In [1]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from math import sqrt
import os
os.getcwd()

'/Users/clause/Desktop/ZD'

In [2]:
train = pd.read_csv('/Users/clause/Desktop/QML/train.csv', index_col = 0)
test = pd.read_csv('/Users/clause/Desktop/QML/test.csv', index_col = 0)

In [3]:
train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,0.1
0,0.652778,0.0,0.0,1.237158,-0.485387,-0.186567,-0.639733,0.0,0
1,0.438889,0.0,0.0,-0.017201,-0.324686,0.515736,-0.158885,1.0,0
2,0.466667,0.0,0.0,0.484542,0.234839,-0.537718,-0.701237,1.0,0
3,0.166667,0.0,0.0,0.484542,0.047138,-0.537718,0.584752,1.0,0
4,0.177778,0.0,0.0,-1.271561,-0.488966,-0.186567,-0.530704,1.0,0


In [4]:
train.rename(columns={'0.1': 'target'}, inplace=True)
test.rename(columns={'0.1': 'target'}, inplace=True)

In [5]:
X_train, y_train, X_test, y_test = train.iloc[:,:-1], train.iloc[:,-1], test.iloc[:,:-1], test.iloc[:,-1]

In [6]:
EPOCHS = 50
BATCH_SIZE = 64
LEARNING_RATE = 0.001

In [7]:
class trainData(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
    
    def __len__(self):
        return len(self.X_data)
    
class testData(Dataset):
    
    def __init__(self, X_data):
        self.X_data = X_data
        
    def __getitem__(self, index):
        return self.X_data[index]
    
    def __len__(self):
        return len(self.X_data)


In [8]:
train_data = trainData(torch.FloatTensor(X_train.values),
                       torch.FloatTensor(y_train.values))    

test_data = testData(torch.FloatTensor(X_test.values))

In [9]:
train_loader = DataLoader(dataset = train_data, batch_size = BATCH_SIZE, shuffle = True)
test_loader = DataLoader(dataset = test_data, batch_size = 1)

In [10]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        
        self.layer_1 = nn.Linear(8, 64)
        self.layer_2 = nn.Linear(64, 64)
        self.layer_3 = nn.Linear(64, 64)
        self.layer_out = nn.Linear(64, 1)
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p = 0.1)
        self.batchnorm1 = nn.BatchNorm1d(64)
        self.batchnorm2 = nn.BatchNorm1d(64)
        self.batchnorm3 = nn.BatchNorm1d(64)
        
    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.batchnorm1(x)
        x = self.relu(self.layer_2(x))
        x = self.batchnorm2(x)
        x = self.relu(self.layer_3(x))
        x = self.batchnorm3(x)
        x = self.dropout(x)
        x = self.layer_out(x)
        
        return x

In [11]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

print(device)

cpu


In [12]:
model = MLP()
model.to(device)

print(model)

MLP(
  (layer_1): Linear(in_features=8, out_features=64, bias=True)
  (layer_2): Linear(in_features=64, out_features=64, bias=True)
  (layer_3): Linear(in_features=64, out_features=64, bias=True)
  (layer_out): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.1, inplace=False)
  (batchnorm1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm3): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [13]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr = LEARNING_RATE)

In [14]:
def MLP_acc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))
    
    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    
    return acc

In [15]:
def MLP_mcc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))
    cm = confusion_matrix(y_test.detach().numpy(), y_pred_tag.detach().numpy())
    mcc = (cm[0,0]*cm[1,1]-cm[0,1]*cm[1,0])/sqrt((cm[0,0]+cm[0,1])*(cm[0,0]+cm[1,0])*(cm[1,1]+cm[0,1])*(cm[1,1]+cm[1,0]))
    
    return mcc

In [16]:
model.train()
for e in range(1, EPOCHS + 1):
    epoch_loss = 0
    epoch_acc = 0
    epoch_mcc = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        
        y_pred = model(X_batch)
        
        loss = criterion(y_pred, y_batch.unsqueeze(1))
        acc = MLP_acc(y_pred, y_batch.unsqueeze(1))
        mcc = MLP_mcc(y_pred, y_batch.unsqueeze(1))
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        epoch_mcc += mcc.item()
        
    #print(f'Epoch {e+0:03}: | Loss:{epoch_loss/len(train_loader):.5f} | Acc:{epoch_acc/len(train_loader): .3f}')
    print(f'Epoch {e+0:03}: | Loss:{epoch_loss/len(train_loader):.5f} | Acc:{epoch_acc/len(train_loader): .3f} Mcc:{epoch_mcc/len(train_loader): .3f}')

Epoch 001: | Loss:0.43748 | Acc: 80.446 Mcc: 0.613
Epoch 002: | Loss:0.40490 | Acc: 82.159 Mcc: 0.646
Epoch 003: | Loss:0.39399 | Acc: 82.783 Mcc: 0.658
Epoch 004: | Loss:0.40072 | Acc: 82.070 Mcc: 0.645
Epoch 005: | Loss:0.39230 | Acc: 82.694 Mcc: 0.656
Epoch 006: | Loss:0.38220 | Acc: 82.904 Mcc: 0.661
Epoch 007: | Loss:0.38632 | Acc: 83.089 Mcc: 0.663
Epoch 008: | Loss:0.37979 | Acc: 83.395 Mcc: 0.672
Epoch 009: | Loss:0.37775 | Acc: 83.682 Mcc: 0.678
Epoch 010: | Loss:0.37349 | Acc: 83.586 Mcc: 0.675
Epoch 011: | Loss:0.37354 | Acc: 83.446 Mcc: 0.671
Epoch 012: | Loss:0.36882 | Acc: 83.904 Mcc: 0.681
Epoch 013: | Loss:0.36927 | Acc: 84.076 Mcc: 0.686
Epoch 014: | Loss:0.36436 | Acc: 84.223 Mcc: 0.688
Epoch 015: | Loss:0.35985 | Acc: 83.994 Mcc: 0.684
Epoch 016: | Loss:0.35918 | Acc: 83.904 Mcc: 0.681
Epoch 017: | Loss:0.35891 | Acc: 84.032 Mcc: 0.685
Epoch 018: | Loss:0.35679 | Acc: 84.325 Mcc: 0.689
Epoch 019: | Loss:0.35594 | Acc: 84.382 Mcc: 0.691
Epoch 020: | Loss:0.35370 | Acc

In [21]:
def pred(x, test_loader):
    y_pred_list = []
    model.eval()
    with torch.no_grad():
        for X_batch in test_loader:
            X_batch = X_batch.to(device)
            y_test_pred= model(X_batch)
            y_test_pred= torch.sigmoid(y_test_pred)
            y_pred_tag = torch.round(y_test_pred)
            y_pred_list.append(y_pred_tag.cpu().numpy())
        
    y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
    cm = confusion_matrix(x, y_pred_list)
    acc = (cm[0,0]+cm[1,1])/(cm[0,0]+cm[0,1]+cm[1,0]+cm[1,1])
    mcc = (cm[0,0]*cm[1,1]-cm[0,1]*cm[1,0])/sqrt((cm[0,0]+cm[0,1])*(cm[0,0]+cm[1,0])*(cm[1,1]+cm[0,1])*(cm[1,1]+cm[1,0]))
    
    print('Confusion Matrix:')
    print(cm)
    print(f' Accurency:{acc}, MCC:{mcc}')

In [22]:
pred(y_test, test_loader)

Confusion Matrix:
[[3139  844]
 [  88  451]]
 Accurency:0.7938965059708094, MCC:0.4478472411703866
