In [85]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

Import Data

In [86]:
def to_onehot(x):
    labels = np.unique(x)
    result = np.zeros(shape=(x.shape[0], labels.shape[0]))

    for i in range(len(x)):
        result[i][x[i]] = 1.0

    return result


def generate_feature_cover(X, std_tol = 500):
    x_mean = np.mean(X, axis = 0)
    x_std = np.mean(X, axis = 0)
    feature_cover = np.zeros(len(x_mean), dtype = bool)

    for i in range(len(x_mean)):
        if np.abs(x_std[i]) > std_tol:
            feature_cover[i] = True    

    return feature_cover


def split_data(X, y, test_size=0.2, val_size=0.2, random_state=42):
   
    # Splitting the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    # Further splitting the training data into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_size / (1 - test_size),
                                                      random_state=random_state)

    return X_train, X_val, X_test, y_train, y_val, y_test

In [116]:
training_data = [] # Features per class
training_labels = [] # Labels
testing_data = [] # Features per class
testing_labels = [] # Labels
N = 5250 # Amount of data we want to use max: 5250

# Import the features
with open("traindata.txt", "r") as file:
    for line in file.readlines():
        features = [float(i) for i in line.split(",")]
        training_data.append(features)

with open("testdata.txt", "r") as file:
    for line in file.readlines():
        features = [float(i) for i in line.split(",")]
        testing_data.append(features)


# Import the labels
with open("trainlabels.txt", "r") as file:
    for line in file.readlines():
        label = float(line.rstrip())
        training_labels.append(label)

with open("targetlabels.txt", "r") as file:
    for line in file.readlines():
        label = float(line.rstrip())
        testing_labels.append(label)

# Convert data to numpy arrays
# X = np.array(training_data)
# y = to_onehot(np.array(training_labels, dtype=np.int64))

# feature_cover = generate_feature_cover(X, 1000)

X_train = np.load("augmented_traindata.npy")
y_train = to_onehot(np.load("augmented_trainlabels.npy"))

X_test, X_val, y_test, y_val = train_test_split(np.array(testing_data), to_onehot(np.array(testing_labels, dtype=np.int64)), test_size=0.5, random_state=42)

Preprocess Data

In [117]:
# Calculates the number of components to consider when performing pca
def num_components(X, variance_tol = 0.8):
    # Standardize each feature of the matrix
    x_mean = np.mean(X, axis = 0)
    x_std = np.std(X, axis = 0)
    Z = (X - x_mean) / x_std

    # Calculate covariance matrix
    C = np.cov(Z, rowvar=False)
    # Calculate eigenvalues and eigenvectors and sort by size
    eigenvalues, eigenvectors = np.linalg.eig(C)
    index = eigenvalues.argsort()[:: -1]
    eigenvalues = eigenvalues[index]
    eigenvectors = eigenvectors[:, index]

    # Calculate explained variance matrix 
    explained_var = np.cumsum(eigenvalues) / np.sum(eigenvalues)

    # Select number of components responsible for variance_tol% of variance
    n_components = np.argmax(explained_var >= variance_tol) + 1
    return Z, x_mean, x_std, n_components

# Parameters are trained components, trained mean, trained standard deviation and the new inputs X
# Changes to the PCA basis
def convert_to_pca(components, mean, std, X):
    Z = (X - mean)/std
    return Z @ components.transpose()

Z, mean, std, n_components = num_components(X_train, 0.70)
# Initialize prinicipal component analysis
pca = PCA(n_components, random_state=453)
pca.fit(Z)
components = pca.components_
X_train_PCA = pca.transform(Z)
temp = pca.transform(X_test)
X_test_PCA = convert_to_pca(components, mean, std, X_test)
X_val_PCA = convert_to_pca(components, mean, std, X_val)

In [118]:
np.save("pca_utils/pca_components", components)
np.save("pca_utils/X_mean", mean)
np.save("pca_utils/X_std", std)

In [119]:
print(X_train_PCA.shape)

(5922, 63)


Build Model

In [131]:
import torch as T
import torch.nn as nn


class NeuralNetwork(nn.Module):
    def __init__(self, n_inputs, n_outputs, p_dropout=0.20, save_dir="./models"):
        super(NeuralNetwork, self).__init__()
        self.save_dir = save_dir

        activation = nn.ReLU()
        dropout = nn.Dropout(p=p_dropout)

        self.network = nn.Sequential(
            nn.Linear(in_features=n_inputs, out_features=512),
            activation,
            dropout,
            nn.Linear(in_features=512, out_features=256),
            activation,
            dropout,
            nn.Linear(in_features=256, out_features=128),
            activation,
            dropout,
            nn.Linear(in_features=128, out_features=n_outputs),
        )
    
    def forward(self, X):
        logits = self.network(X)
        return logits
    
    def save(self, name):
        T.save(self.state_dict(), f"{self.save_dir}/{name}.pth")

    def load(self, name):
        self.load_state_dict(T.load(f"{self.save_dir}/{name}.pth"))

n_inputs = X_train_PCA.shape[1] # 140 inputs
n_outputs = 21 # 21 labels

# Move a tensor to the GPU
device = T.device("cuda" if T.cuda.is_available() else "cpu")

# Initialize the model
model_1 = NeuralNetwork(n_inputs=n_inputs, n_outputs=n_outputs, p_dropout=0.2).to(device)
model_2 = NeuralNetwork(n_inputs=n_inputs, n_outputs=n_outputs, p_dropout=0.2).to(device)


Train Model

In [132]:
def make_confusion_matrix(y_pred, y_true, labels):
    N = labels.shape[0]
    matrix = [[0] * (N + 1) for _ in range(N + 1)]

    matrix[0][0] = " "
    for i in range(1, N):
        matrix[i][0] = f"{i}"
        matrix[0][i] = f"{i}"

    for i in range(len(y_pred)):
        matrix[round(y_pred[i]) + 1][y_true[i] + 1] += 1

    for i in range(N):
        print(" ".join(map(str, matrix[i])))

    return sum([matrix[i + 1][i + 1] for i in range(2)]) / len(y_pred)


In [133]:
def validate_model(model1, model2, X_val, y_val, criterion):
    size = len(y_val)

    model1.eval()
    model2.eval()

    with T.no_grad():
        X = T.from_numpy(X_val).to(T.float32).to(device)
        y_true = T.Tensor(y_val).to(T.float).to(device)

        logits_1 = model1.forward(X)
        logits_2 = model2.forward(X)

        loss_1, loss_2 = criterion(logits_1, logits_2, y_true, 0.2)

        correct_1 = (logits_1.argmax(1) == y_true.argmax(1)).type(T.float).sum().item()
        correct_2 = (logits_2.argmax(1) == y_true.argmax(1)).type(T.float).sum().item()
        
        loss_1 /= size
        loss_2 /= size
        accuracy_1 = correct_1/size
        accuracy_2 = correct_2/size
        print(f"Validation Error (Model 1): \n Accuracy: {(100 * (accuracy_1)):>0.1f}%, Avg loss: {loss_1:>8f}")
        print(f"Validation Error (Model 2): \n Accuracy: {(100 * (accuracy_2)):>0.1f}%, Avg loss: {loss_2:>8f} \n")
    
    return accuracy_1, accuracy_2, loss_1, loss_2

In [134]:
def train_model(model_1, model_2, X_train, y_train, criterion, optimizer_1, optimizer_2):
    size = len(X_train)
    batch_size = 141

    #Prevents model from memorizing the position of data
    indices = np.random.randint(0, size, size)

    model_1.train()
    model_2.train()

    for i in range(size//batch_size):
        start = batch_size * i
        end = start + batch_size

        X = T.from_numpy(X_train[indices[start:end]]).to(T.float32).to(device)
        y_true = T.Tensor(y_train[indices[start:end]]).to(T.float).to(device)

        logits_1 = model_1.forward(X)
        logits_2 = model_2.forward(X)

        loss_1, loss_2 = criterion(logits_1, logits_2, y_true, 0.2)

        # Gradiant Descent using Adam optimizer for best performance
        optimizer_1.zero_grad()
        loss_1.backward()
        optimizer_1.step()

        optimizer_2.zero_grad()
        loss_2.backward()
        optimizer_2.step()

        correct_1 = (logits_1.argmax(1) == y_true.argmax(1)).type(T.float).sum().item()
        correct_2 = (logits_2.argmax(1) == y_true.argmax(1)).type(T.float).sum().item()

        accuracy_1 = correct_1/batch_size
        accuracy_2 = correct_2/batch_size

        if (i * batch_size) % 564 == 0:
            loss_1, loss_2, current = loss_1.item(), loss_2.item(), (i + 1) * batch_size
            print(f"Accuracy_1: {(100 * (accuracy_1)):>0.1f}%, Loss_1: {loss_1:>7f}, ", end="")
            print(f"Accuracy_2: {(100 * (accuracy_2)):>0.1f}% Loss_2: {loss_2:>7f}  [{current:>5d}/{size:>5d}]")

In [135]:
import torch as T
import torch.nn.functional as F

# Loss functions
def loss_coteaching(y_1, y_2, t, forget_rate):
    loss_1 = F.cross_entropy(y_1, t, reduce = False)
    ind_1_sorted = T.argsort(loss_1.data)
    loss_1_sorted = loss_1[ind_1_sorted]

    loss_2 = F.cross_entropy(y_2, t, reduce = False)
    ind_2_sorted = T.argsort(loss_2.data)
    loss_2_sorted = loss_2[ind_2_sorted]

    remember_rate = 1 - forget_rate
    num_remember = int(remember_rate * len(loss_1_sorted))


    ind_1_update=ind_1_sorted[:num_remember]
    ind_2_update=ind_2_sorted[:num_remember]
    # exchange
    loss_1_update = F.cross_entropy(y_1[ind_2_update], t[ind_2_update])
    loss_2_update = F.cross_entropy(y_2[ind_1_update], t[ind_1_update])

    return T.sum(loss_1_update)/num_remember, T.sum(loss_2_update)/num_remember



In [136]:
import torch.optim as optim

# Global Variables
epochs = 5_000
learning_rate = 1e-4

# Load Pre-Trained Models
# model_1.load("NeuralNetwork-1_acc-50.29_loss-0.000003")
# model_2.load("NeuralNetwork-2_acc-50.38_loss-0.000003")

criterion = loss_coteaching # Co-teaching loss function
optimizer_1 = optim.Adam(model_1.parameters(), lr=learning_rate)
optimizer_2 = optim.Adam(model_2.parameters(), lr=learning_rate)

best_accuracy = 0.56 # ???

for i in range(epochs):
    print(f"Epoch {i+1}\n-------------------------------")

    train_model(model_1, model_2, X_train_PCA, y_train, criterion, optimizer_1, optimizer_2)
    print('Finished training')
    
    accuracy_1, accuracy_2, loss_1, loss_2 = validate_model(model_1, model_2, X_val_PCA, y_val, criterion)

    if max(accuracy_1, accuracy_2) > best_accuracy:
        print(f"[+] Saving Model...")

        model_1.save(f"NeuralNetwork-1_acc-{accuracy_1 * 100:.2f}_loss-{loss_1:>8f}")
        model_2.save(f"NeuralNetwork-2_acc-{accuracy_2 * 100:.2f}_loss-{loss_2:>8f}")
        best_accuracy = max(accuracy_1, accuracy_2)

        print(f"[!] Models Saved.")


Epoch 1
-------------------------------
Accuracy_1: 7.1%, Loss_1: 0.027281, Accuracy_2: 5.7% Loss_2: 0.027375  [  141/ 5922]
Accuracy_1: 3.5%, Loss_1: 0.027391, Accuracy_2: 6.4% Loss_2: 0.027110  [  705/ 5922]
Accuracy_1: 5.7%, Loss_1: 0.026730, Accuracy_2: 5.7% Loss_2: 0.026963  [ 1269/ 5922]
Accuracy_1: 9.2%, Loss_1: 0.026897, Accuracy_2: 5.0% Loss_2: 0.027097  [ 1833/ 5922]
Accuracy_1: 5.7%, Loss_1: 0.026818, Accuracy_2: 3.5% Loss_2: 0.026767  [ 2397/ 5922]
Accuracy_1: 7.1%, Loss_1: 0.026595, Accuracy_2: 9.2% Loss_2: 0.026566  [ 2961/ 5922]
Accuracy_1: 7.1%, Loss_1: 0.026830, Accuracy_2: 7.1% Loss_2: 0.026811  [ 3525/ 5922]




Accuracy_1: 9.2%, Loss_1: 0.026682, Accuracy_2: 7.1% Loss_2: 0.026605  [ 4089/ 5922]
Accuracy_1: 7.1%, Loss_1: 0.026346, Accuracy_2: 7.8% Loss_2: 0.026225  [ 4653/ 5922]
Accuracy_1: 12.1%, Loss_1: 0.026430, Accuracy_2: 7.8% Loss_2: 0.026404  [ 5217/ 5922]
Accuracy_1: 7.1%, Loss_1: 0.026305, Accuracy_2: 12.8% Loss_2: 0.026101  [ 5781/ 5922]
Finished training
Validation Error (Model 1): 
 Accuracy: 12.8%, Avg loss: 0.000003
Validation Error (Model 2): 
 Accuracy: 11.5%, Avg loss: 0.000003 

Epoch 2
-------------------------------
Accuracy_1: 7.1%, Loss_1: 0.026393, Accuracy_2: 6.4% Loss_2: 0.026131  [  141/ 5922]
Accuracy_1: 9.2%, Loss_1: 0.026088, Accuracy_2: 8.5% Loss_2: 0.026092  [  705/ 5922]
Accuracy_1: 12.1%, Loss_1: 0.026259, Accuracy_2: 13.5% Loss_2: 0.025375  [ 1269/ 5922]
Accuracy_1: 14.2%, Loss_1: 0.025513, Accuracy_2: 12.1% Loss_2: 0.025007  [ 1833/ 5922]
Accuracy_1: 12.1%, Loss_1: 0.025950, Accuracy_2: 10.6% Loss_2: 0.025521  [ 2397/ 5922]
Accuracy_1: 12.8%, Loss_1: 0.025445

Testing Model

In [113]:
criterion = loss_coteaching

def test_model(model_1, model_2, X_test, y_test, criterion):
    size = len(y_test)

    model_1.eval()
    model_2.eval()
    with T.no_grad():
        X = T.from_numpy(X_test).to(T.float32).to(device)
        y_true = T.Tensor(y_test).to(T.float).to(device)

        logits_1 = model_1.forward(X)
        logits_2 = model_2.forward(X)

        loss_1, loss_2 = criterion(logits_1, logits_2, y_true, 0.2)

        correct_1 = (logits_1.argmax(1) == y_true.argmax(1)).type(T.float).sum().item()
        correct_2 = (logits_2.argmax(1) == y_true.argmax(1)).type(T.float).sum().item()
        
        loss_1 /= size
        loss_2 /= size
        accuracy_1 = correct_1/size
        accuracy_2 = correct_2/size
        
        print(f"Test Error (Model 1): \n Accuracy: {(100 * (accuracy_1)):>0.1f}%, Avg loss: {loss_1:>8f}")
        print(f"Test Error (Model 2): \n Accuracy: {(100 * (accuracy_2)):>0.1f}%, Avg loss: {loss_2:>8f} \n")

In [138]:
model_1.load("NeuralNetwork-1_acc-60.86_loss-0.000002")
model_2.load("NeuralNetwork-2_acc-58.38_loss-0.000002")
test_model(model_1, model_2, X_test_PCA, y_test, criterion)

Test Error (Model 1): 
 Accuracy: 58.8%, Avg loss: 0.000002
Test Error (Model 2): 
 Accuracy: 59.0%, Avg loss: 0.000002 

