In [108]:
import pandas as pd 
import numpy as np
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch import Tensor
from torch.nn import Linear
from torch.nn import ReLU
from torch.nn import Sigmoid
from torch.nn import Softmax
from torch.nn import Module
from torch.optim import SGD
from torch.nn import CrossEntropyLoss
from torch.nn.init import kaiming_uniform_
from torch.nn.init import xavier_uniform_
import torch.nn.functional as F
from tqdm import tqdm_notebook
from torch.nn import BatchNorm1d
from torch.nn import LeakyReLU

import warnings
warnings.filterwarnings("ignore")

In [109]:
class CSVDataset(Dataset):
    def __init__(self, path):
        df = pd.read_csv(path)
        self.X = df.values[:, :-1]
        self.y = df.values[:, -1]
        self.X = self.X.astype('float32')
        self.y = LabelEncoder().fit_transform(self.y)
 
    def __len__(self):
        return len(self.X)
 
    def __getitem__(self, idx):
        return [self.X[idx], self.y[idx]]
 
    def get_splits(self, n_test=0.2):
        test_size = round(n_test * len(self.X))
        train_size = len(self.X) - test_size
        return random_split(self, [train_size, test_size])

In [110]:
def prepare_data(path):
    dataset = CSVDataset(path)
    train, test = dataset.get_splits()
    train_dl = DataLoader(train, shuffle=True)
    test_dl = DataLoader(test, shuffle=False)
    return train_dl, test_dl

In [111]:
class MLP(Module):
    def __init__(self, n_inputs):
        super(MLP, self).__init__()

        self.hidden1 = Linear(n_inputs, 64)
        kaiming_uniform_(self.hidden1.weight, nonlinearity='relu')
        self.act1 = ReLU()

        self.hidden2 = Linear(64, 20)
        kaiming_uniform_(self.hidden2.weight, nonlinearity='relu')
        self.act2 = F.leaky_relu

        self.hidden3 = Linear(20, 2)
        xavier_uniform_(self.hidden3.weight)
        self.act3 = Sigmoid()
 
    def forward(self, X):
        X = self.hidden1(X)
        X = self.act1(X)
        X = self.hidden2(X)
        X = self.act2(X)
        X = self.hidden3(X)
        X = self.act3(X)
        return X

In [112]:
def train_model(train_dl, model):
    criterion = CrossEntropyLoss()
    optimizer = SGD(model.parameters(), lr=0.001)

    for epoch in tqdm_notebook(range(10)):
        for i, (inputs, targets) in enumerate(train_dl):
            optimizer.zero_grad()
            yhat = model(inputs)
            loss = criterion(yhat, targets)
            loss.backward()
            optimizer.step()

In [113]:
def evaluate_model(test_dl, model):
    predictions, actuals = list(), list()
    for i, (inputs, targets) in enumerate(test_dl):
        yhat = model(inputs)
        yhat = yhat.detach().numpy()
        actual = targets.numpy()
        yhat = np.argmax(yhat, axis=1)
        actual = actual.reshape(-1, 1)
        yhat = yhat.reshape(-1, 1)
        predictions.append(yhat)
        actuals.append(actual)
    predictions, actuals = np.vstack(predictions), np.vstack(actuals)
    acc = accuracy_score(actuals, predictions)
    return acc

In [114]:
train_dl, test_dl = prepare_data("sampled_data.csv")

In [115]:
import time

start_time = time.time()

model = MLP(10)
train_model(train_dl, model)

end_time = time.time()
training_time = end_time - start_time

print('Finished Training')
print(f'Training time: {training_time:.2f} seconds')

  0%|          | 0/10 [00:00<?, ?it/s]

Finished Training
Training time: 96.10 seconds


In [120]:
acc = evaluate_model(test_dl, model)
print('Accuracy: %.3f' % acc)

Accuracy: 0.850
