In [1]:
# Imports
import torch
import pandas as pd
from torch import nn  # All neural network modules
from torch import optim  # For optimizers like SGD, Adam, etc.
from torch.utils.data import Dataset, DataLoader  # Gives easier dataset managment
import torchvision
import torchvision.transforms as transforms  # Transformations we can perform on our dataset for augmentation

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [14]:
# importing the data
dataset = pd.read_csv('data-lab-test-13-csv/data.csv', index_col=0)
labels = pd.read_csv('data-lab-test-13-csv/labels.csv')
print(dataset.shape)
print(labels.shape)


(999, 399)
(999, 1)


In [24]:
import csv

file = open("data-lab-test-13-csv/labels.csv", "r")
labels = list(csv.reader(file, delimiter=","))
file.close()
print(labels[0:5])
print(type(labels))

# converting the labels to a list
# remove individual lists inside the main list and convert to a single list
labels = [item for sublist in labels for item in sublist]

print(labels[0:5])
print(len(labels))

[['5.000000000000000000e+00'], ['5.000000000000000000e+00'], ['2.000000000000000000e+00'], ['0.000000000000000000e+00'], ['0.000000000000000000e+00']]
<class 'list'>
['5.000000000000000000e+00', '5.000000000000000000e+00', '2.000000000000000000e+00', '0.000000000000000000e+00', '0.000000000000000000e+00']
1000


In [17]:
X = dataset.values
print (X.shape)

(999, 399)


In [25]:
# Standardize the data

X = (X - X.mean()) / X.std(ddof=0)
print(X.shape)

(999, 399)


In [27]:
# Calculating the correlation matrix of the data
X_corr = (1 / len(labels)) * X.T.dot(X)

In [29]:
# Plotting the correlation matrix
# import matplotlib.pyplot as plt
# import seaborn as sns
# plt.figure(figsize=(10,10))
# sns.heatmap(X_corr, vmax=1, square=True,annot=True)
# plt.title('Correlation matrix')

Text(0.5, 1.0, 'Correlation matrix')

Error in callback <function flush_figures at 0x7faea9968310> (for post_execute):


KeyboardInterrupt: 

In [35]:
import numpy as np
eig_values, eig_vectors = np.linalg.eig(X_corr)
print(eig_values.shape)
print(eig_vectors.shape)


(399,)
(399, 399)


In [36]:
np.sum(eig_values)

398.6010000000015

In [38]:
import matplotlib.pyplot as plt
variance=(eig_values / np.sum(eig_values))*100

In [39]:
# calculating our new axis
pc1 = X.dot(eig_vectors[:,0])
pc2 = X.dot(eig_vectors[:,1])

In [60]:
# print(pc2)
len(pc1)
print(pc1[1])
print(pc2[1])


7.390977150257942
2.1161325165360347


In [48]:
import os
import random
import numpy as np
import torch
import numpy as np
from torchvision import datasets, transforms
import math
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score

In [50]:
# creating a Multi-layer perceptron model

class FFN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = torch.nn.Linear(2, 512)
        self.layer2 = torch.nn.Linear(512, 128)
        self.layer3 = torch.nn.Linear(128, 2)
    
    def forward(self, x):
        # Change the activation function and see what effect we get
        x = torch.nn.Tanh()(self.layer1(x))
        x = torch.nn.Tanh()(self.layer2(x))
        x = self.layer3(x)
        return x
    

In [61]:
# Set the number of epochs to be used
epochs = 5
# Create the model
model = FFN().to(device)
# Define Loss
loss_function = torch.nn.CrossEntropyLoss()
# Define Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.008)

pc_list = [pc1, pc2]

# These two lists will be used to store average loss and accuracy for each epoch
total_loss, acc = list(), list()

def accuracy_score():
    for epoch in range(epochs):
        print("\n\nEpoch:", epoch+1)
        # Each batch produces a loss, predictions and target
        batch_loss, batch_preds, batch_target = 0, list(), list()
        # For each batch, train the model
        for (x, y) in pc_list:
            # Make sure that data is on the same device as the model
            x, y = x.to(device), y.to(device)
            # Remove all previous gradients
            optimizer.zero_grad()
            # Get predictions by performing a forward pass
            preds = model.forward(x.float())
            # Calculate error
            loss = loss_function(preds, y.to(torch.int64))
            # Calculate all the gradients for each layer
            loss.backward()
            # Finall, update the weights
            optimizer.step()
            # Save the loss
            batch_loss+= loss.item()
            # Save the predictions and target
            batch_preds.extend(np.argmax(preds.cpu().detach().numpy(), axis=1))
            batch_target.extend(y.cpu().detach().numpy())
        # Calculate average loss
        total_loss.append(batch_loss/len(pc1))
        # Calculate accuracy for this epoch
        acc.append(accuracy_score(batch_target, batch_preds))
        print("Loss:", total_loss[-1], "\tAcc:", acc[-1])
        return total_loss, acc

In [62]:
# 5 fold cross validation on the data

from sklearn.model_selection import cross_val_score
scores = cross_val_score(accuracy_score, X, labels, cv=5)


TypeError: estimator should be an estimator implementing 'fit' method, <function accuracy_score at 0x7faeae0d1dc0> was passed

In [63]:
# performing linear discriminant analysis


class LDA:

    def __init__(self, n_components):
        self.n_components = n_components
        self.linear_discriminants = None

    def fit(self, X, y):
        n_features = X.shape[1]
        class_labels = np.unique(y)

        # Within class scatter matrix:
        # SW = sum((X_c - mean_X_c)^2 )

        # Between class scatter:
        # SB = sum( n_c * (mean_X_c - mean_overall)^2 )

        mean_overall = np.mean(X, axis=0)
        SW = np.zeros((n_features, n_features))
        SB = np.zeros((n_features, n_features))
        for c in class_labels:
            X_c = X[y == c]
            mean_c = np.mean(X_c, axis=0)
            # (4, n_c) * (n_c, 4) = (4,4) -> transpose
            SW += (X_c - mean_c).T.dot((X_c - mean_c))

            # (4, 1) * (1, 4) = (4,4) -> reshape
            n_c = X_c.shape[0]
            mean_diff = (mean_c - mean_overall).reshape(n_features, 1)
            SB += n_c * (mean_diff).dot(mean_diff.T)

        # Determine SW^-1 * SB
        A = np.linalg.inv(SW).dot(SB)
        # Get eigenvalues and eigenvectors of SW^-1 * SB
        eigenvalues, eigenvectors = np.linalg.eig(A)
        # -> eigenvector v = [:,i] column vector, transpose for easier calculations
        # sort eigenvalues high to low
        eigenvectors = eigenvectors.T
        idxs = np.argsort(abs(eigenvalues))[::-1]
        eigenvalues = eigenvalues[idxs]
        eigenvectors = eigenvectors[idxs]
        # store first n eigenvectors
        self.linear_discriminants = eigenvectors[0:self.n_components]

    def transform(self, X):
        # project data
        return np.dot(X, self.linear_discriminants.T)


In [64]:
lda = LDA(n_components=100)
# scores = cross_val_score(lda.fit, X, labels, cv=5)

TypeError: estimator should be an estimator implementing 'fit' method, <bound method LDA.fit of <__main__.LDA object at 0x7faeadff66d0>> was passed

In [None]:

lda = LDA(n_components=200)
# test train split
# X_train, y_train = 

# scores = cross_val_score(lda.fit, X, labels, cv=5)