In [1]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

Import Data

In [2]:
def to_onehot(x):
    labels = np.unique(x)
    result = np.zeros(shape=(x.shape[0], labels.shape[0]))

    for i in range(len(x)):
        result[i][x[i]] = 1.0

    return result


def generate_feature_cover(X, std_tol = 500):
    x_mean = np.mean(X, axis = 0)
    x_std = np.mean(X, axis = 0)
    feature_cover = np.zeros(len(x_mean), dtype = bool)

    for i in range(len(x_mean)):
        if np.abs(x_std[i]) > std_tol:
            feature_cover[i] = True    

    return feature_cover


def split_data(X, y, test_size=0.2, val_size=0.2, random_state=42):
   
    # Splitting the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    # Further splitting the training data into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_size / (1 - test_size),
                                                      random_state=random_state)

    return X_train, X_val, X_test, y_train, y_val, y_test

In [3]:
training_data = [] # Features per class
training_labels = [] # Labels
testing_data = [] # Features per class
testing_labels = [] # Labels
N = 5250 # Amount of data we want to use max: 5250

# Import the features
with open("traindata.txt", "r") as file:
    for line in file.readlines():
        features = [float(i) for i in line.split(",")]
        training_data.append(features)

with open("testdata.txt", "r") as file:
    for line in file.readlines():
        features = [float(i) for i in line.split(",")]
        testing_data.append(features)


# Import the labels
with open("trainlabels.txt", "r") as file:
    for line in file.readlines():
        label = float(line.rstrip())
        training_labels.append(label)

with open("targetlabels.txt", "r") as file:
    for line in file.readlines():
        label = float(line.rstrip())
        testing_labels.append(label)

# Convert data to numpy arrays
# X = np.array(training_data)
# y = to_onehot(np.array(training_labels, dtype=np.int64))

# feature_cover = generate_feature_cover(X, 1000)

X_train = np.load("augmented_traindata.npy")
y_train = to_onehot(np.load("augmented_trainlabels.npy"))

X_test, X_val, y_test, y_val = train_test_split(np.array(testing_data), to_onehot(np.array(testing_labels, dtype=np.int64)), test_size=0.5, random_state=42)

Preprocess Data

In [4]:
# Calculates the number of components to consider when performing pca
def num_components(X, variance_tol = 0.8):
    # Standardize each feature of the matrix
    x_mean = np.mean(X, axis = 0)
    x_std = np.std(X, axis = 0)
    Z = (X - x_mean) / x_std

    # Calculate covariance matrix
    C = np.cov(Z, rowvar=False)
    # Calculate eigenvalues and eigenvectors and sort by size
    eigenvalues, eigenvectors = np.linalg.eig(C)
    index = eigenvalues.argsort()[:: -1]
    eigenvalues = eigenvalues[index]
    eigenvectors = eigenvectors[:, index]

    # Calculate explained variance matrix 
    explained_var = np.cumsum(eigenvalues) / np.sum(eigenvalues)

    # Select number of components responsible for variance_tol% of variance
    n_components = np.argmax(explained_var >= variance_tol) + 1
    return Z, x_mean, x_std, n_components

# Parameters are trained components, trained mean, trained standard deviation and the new inputs X
# Changes to the PCA basis
def convert_to_pca(components, mean, std, X):
    Z = (X - mean)/std
    return Z @ components.transpose()

Z, mean, std, n_components = num_components(X_train, 0.67)
# Initialize prinicipal component analysis
pca = PCA(n_components, random_state=453)
pca.fit(Z)
components = pca.components_
X_train_PCA = pca.transform(Z)
temp = pca.transform(X_test)
X_test_PCA = convert_to_pca(components, mean, std, X_test)
X_val_PCA = convert_to_pca(components, mean, std, X_val)

In [5]:
np.save("pca_utils/pca_components", components)
np.save("pca_utils/X_mean", mean)
np.save("pca_utils/X_std", std)

In [6]:
print(X_train_PCA.shape)

(5922, 49)


Build Model

In [7]:
import torch as T
import torch.nn as nn


class NeuralNetwork(nn.Module):
    def __init__(self, n_inputs, n_outputs, p_dropout=0.20, num_groups=64, save_dir="./models"):
        super(NeuralNetwork, self).__init__()
        self.save_dir = save_dir

        activation = nn.ReLU()
        dropout = nn.AlphaDropout(p=p_dropout)

        self.network = nn.Sequential(
            nn.Linear(in_features=n_inputs, out_features=1024),
            nn.GroupNorm(num_groups,1024),
            activation,
            dropout,
            nn.Linear(in_features=1024, out_features=512),
            nn.GroupNorm(num_groups,512),
            activation,
            dropout,
            nn.Linear(in_features=512, out_features=256),
            nn.GroupNorm(num_groups,256),
            activation,
            dropout,
            nn.Linear(in_features=256, out_features=n_outputs),
        )
    
    def forward(self, X):
        logits = self.network(X)
        return logits
    
    def save(self, name):
        T.save(self.state_dict(), f"{self.save_dir}/{name}.pth")

    def load(self, name):
        self.load_state_dict(T.load(f"{self.save_dir}/{name}.pth"))

n_inputs = X_train_PCA.shape[1] # 140 inputs
n_outputs = 21 # 21 labels

# Move a tensor to the GPU
device = T.device("cuda" if T.cuda.is_available() else "cpu")

# Initialize the model
model_1 = NeuralNetwork(n_inputs=n_inputs, n_outputs=n_outputs, p_dropout=0.4).to(device)
model_2 = NeuralNetwork(n_inputs=n_inputs, n_outputs=n_outputs, p_dropout=0.4).to(device)


## Ensembling

In [8]:
class Ensemble(nn.Module):
    def __init__(self, models, n_inputs, n_outputs, save_dir="./models"):
        super(Ensemble, self).__init__()
        self.save_dir = save_dir

        self.models = models

        activation = nn.ReLU()
        self.classifier = nn.Sequential(
            activation,
            nn.Linear(in_features=n_inputs * len(models), out_features=n_outputs),
        )
    
    def forward(self, x):
        outputs = []
        for model in self.models:
            model.eval()
            with T.no_grad():
                outputs.append(model(x.clone()))

        x = T.cat(outputs, dim=1)
        logits = self.classifier(x)
        return logits
    
    def save(self, name):
        T.save(self.state_dict(), f"{self.save_dir}/{name}.pth")

    def load(self, name):
        self.load_state_dict(T.load(f"{self.save_dir}/{name}.pth"))

#Freeze these models 
model_1.load("NeuralNetwork-1_acc-63.81_loss-0.000002")
for param in model_1.parameters():
    param.requires_grad_(False)

model_2.load("NeuralNetwork-2_acc-63.14_loss-0.000002")
for param in model_2.parameters():
    param.requires_grad_(False)


models = [model_1, model_2]
model = Ensemble(models, 21, 21).to(device)


Train Model

In [9]:
def make_confusion_matrix(y_pred, y_true, labels):
    N = labels.shape[0]
    matrix = [[0] * (N + 1) for _ in range(N + 1)]

    matrix[0][0] = " "
    for i in range(1, N):
        matrix[i][0] = f"{i}"
        matrix[0][i] = f"{i}"

    for i in range(len(y_pred)):
        matrix[round(y_pred[i]) + 1][y_true[i] + 1] += 1

    for i in range(N):
        print(" ".join(map(str, matrix[i])))

    return sum([matrix[i + 1][i + 1] for i in range(2)]) / len(y_pred)


In [10]:
def validate_model(model, X_val, y_val, criterion):
    size = len(y_val)

    model.eval()

    with T.no_grad():
        X = T.from_numpy(X_val).to(T.float32).to(device)
        y_true = T.Tensor(y_val).to(T.float).to(device)

        logits = model.forward(X)

        loss = criterion(logits, y_true)

        correct = (logits.argmax(1) == y_true.argmax(1)).type(T.float).sum().item()
        
        loss /= size
        accuracy = correct/size
        print(f"Validation Error: \n Accuracy: {(100 * (accuracy)):>0.1f}%, Avg loss: {loss:>8f}\n")
    
    return accuracy, loss

In [11]:
def train_model(model, X_train, y_train, criterion, optimizer):
    size = len(X_train)
    batch_size = 141

    #Prevents model from memorizing the position of data
    indices = np.random.randint(0, size, size)

    model.train()

    for i in range(size//batch_size):
        start = batch_size * i
        end = start + batch_size

        X = T.from_numpy(X_train[indices[start:end]]).to(T.float32).to(device)
        y_true = T.Tensor(y_train[indices[start:end]]).to(T.float).to(device)

        logits = model.forward(X)
        
        loss = criterion(logits, y_true)

        # Gradiant Descent using Adam optimizer for best performance
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        correct = (logits.argmax(1) == y_true.argmax(1)).type(T.float).sum().item()
        accuracy = correct/batch_size

        if (i * batch_size) % 564 == 0:
            loss, current = loss.item(), (i + 1) * batch_size
            print(f"Accuracy: {(100 * (accuracy)):>0.1f}%, Loss_1: {loss:>7f} [{current:>5d}/{size:>5d}]")

In [19]:
import torch.optim as optim

# Global Variables
epochs = 10_000
learning_rate = 1e-4

# Load Pre-Trained Models
model.load("NeuralNetwork-Ensemble_acc-62.76_loss-0.001620")

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

best_accuracy = 0.625 # ???

for epoch in range(epochs):
    print(f"Epoch {epoch + 1}\n-------------------------------")

    train_model(model, X_train_PCA, y_train, criterion, optimizer)
    print('Finished training')
    
    accuracy, loss = validate_model(model, X_val_PCA, y_val, criterion)

    if accuracy > best_accuracy:
        print(f"[+] Saving Model...")
        model.save(f"NeuralNetwork-Ensemble_acc-{accuracy * 100:.2f}_loss-{loss:>8f}")
        best_accuracy = accuracy

        print(f"[!] Models Saved.")

    epoch += 1


Epoch 1
-------------------------------
Accuracy: 89.4%, Loss_1: 0.373986 [  141/ 5922]
Accuracy: 86.5%, Loss_1: 0.599968 [  705/ 5922]
Accuracy: 85.8%, Loss_1: 0.623146 [ 1269/ 5922]
Accuracy: 85.8%, Loss_1: 0.542281 [ 1833/ 5922]
Accuracy: 85.1%, Loss_1: 0.536167 [ 2397/ 5922]
Accuracy: 90.8%, Loss_1: 0.425996 [ 2961/ 5922]
Accuracy: 86.5%, Loss_1: 0.472095 [ 3525/ 5922]
Accuracy: 90.1%, Loss_1: 0.502522 [ 4089/ 5922]
Accuracy: 85.8%, Loss_1: 0.451313 [ 4653/ 5922]
Accuracy: 87.9%, Loss_1: 0.478804 [ 5217/ 5922]
Accuracy: 83.0%, Loss_1: 0.618623 [ 5781/ 5922]
Finished training
Validation Error: 
 Accuracy: 62.4%, Avg loss: 0.001619

Epoch 2
-------------------------------
Accuracy: 88.7%, Loss_1: 0.371718 [  141/ 5922]
Accuracy: 90.8%, Loss_1: 0.382165 [  705/ 5922]
Accuracy: 87.2%, Loss_1: 0.492059 [ 1269/ 5922]
Accuracy: 86.5%, Loss_1: 0.460212 [ 1833/ 5922]
Accuracy: 85.8%, Loss_1: 0.524624 [ 2397/ 5922]
Accuracy: 86.5%, Loss_1: 0.525899 [ 2961/ 5922]
Accuracy: 87.2%, Loss_1: 0.53

Testing Model

In [13]:
def test_model(model, X_test, y_test, criterion):
    size = len(y_test)

    model.eval()
    with T.no_grad():
        X = T.from_numpy(X_test).to(T.float32).to(device)
        y_true = T.Tensor(y_test).to(T.float).to(device)

        logits = model.forward(X)

        loss = criterion(logits, y_true)

        correct = (logits.argmax(1) == y_true.argmax(1)).type(T.float).sum().item()
        
        loss /= size
        accuracy = correct/size
        
        print(f"Test Error: \n Accuracy: {(100 * (accuracy)):>0.1f}%, Avg loss: {loss:>8f}")

In [18]:
model.load("NeuralNetwork-Ensemble_acc-62.76_loss-0.001620")
test_model(model, X_test_PCA, y_test, criterion)

Test Error: 
 Accuracy: 61.4%, Avg loss: 0.001585
