In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!cp /content/drive/MyDrive/extendable.hdf5 extendable.hdf5

In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import StandardScaler    
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

In [4]:
import tables as tb, numpy as np
hdf5_epath = 'extendable.hdf5'
h5f = tb.open_file(hdf5_epath, mode='r')

X = h5f.root.MyData.X[:]
Y = h5f.root.MyData.Y[:]
ID = h5f.root.MyData.id[:]
h5f.close()

In [5]:
X = np.resize(X, (-1,786))
y = np.resize(Y, (-1,1))
ID = np.resize(ID, (-1,1))
X.shape, y.shape, ID.shape

((146762, 786), (146762, 1), (146762, 1))

In [6]:
EPOCHS = 50
BATCH_SIZE = 32
LEARNING_RATE = 0.001
MODEL_PATH = "NSP_classification.bin"

In [7]:
# train data
class data_loader(Dataset):
    def __init__(self, X, y):
        self.X_data = X
        self.y_data = y
    def __getitem__(self, index):
        return torch.FloatTensor(self.X_data[index]), torch.FloatTensor(self.y_data[index])
    def __len__ (self):
        return len(self.X_data)

X_train, X_test, y_train, y_test, id_train, id_test = train_test_split(X, y, ID, test_size=0.1, random_state=42)

train_loader = DataLoader(data_loader(X_train, y_train), batch_size=BATCH_SIZE)
test_loader = DataLoader(data_loader(X_test, y_test), batch_size=BATCH_SIZE)

In [8]:
# train_loader[0][0].shape, train_loader[0][1].shape

In [9]:
class binaryClassification(nn.Module):
    def __init__(self):
        super(binaryClassification, self).__init__()
        self.layer_1 = nn.Linear(786, 256)
        self.layer_2 = nn.Linear(256, 128)
        self.layer_out = nn.Linear(128, 1)
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.1)
        self.batchnorm1 = nn.BatchNorm1d(256)
        self.batchnorm2 = nn.BatchNorm1d(128)
        
    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.batchnorm1(x)
        x = self.relu(self.layer_2(x))
        x = self.batchnorm2(x)
        x = self.dropout(x)
        x = self.layer_out(x)
        
        return x


In [10]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [11]:
model = binaryClassification()
model.to(device)
# print(model)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)


In [12]:
def binary_acc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))

    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    
    return acc


In [13]:
model.train()
for e in range(1, EPOCHS+1):
    epoch_loss = 0
    epoch_acc = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        y_pred = model(X_batch)
        
        # print(y_pred.shape, y_batch.shape)
        loss = criterion(y_pred, y_batch)
        acc = binary_acc(y_pred, y_batch)
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        

    print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')


Epoch 001: | Loss: 0.28444 | Acc: 88.261
Epoch 002: | Loss: 0.22960 | Acc: 90.766
Epoch 003: | Loss: 0.21027 | Acc: 91.623
Epoch 004: | Loss: 0.19450 | Acc: 92.326
Epoch 005: | Loss: 0.18056 | Acc: 92.987
Epoch 006: | Loss: 0.16897 | Acc: 93.401
Epoch 007: | Loss: 0.15899 | Acc: 93.876
Epoch 008: | Loss: 0.15008 | Acc: 94.243
Epoch 009: | Loss: 0.14064 | Acc: 94.530
Epoch 010: | Loss: 0.13719 | Acc: 94.692
Epoch 011: | Loss: 0.13174 | Acc: 94.920
Epoch 012: | Loss: 0.12523 | Acc: 95.209
Epoch 013: | Loss: 0.11853 | Acc: 95.445
Epoch 014: | Loss: 0.11177 | Acc: 95.745
Epoch 015: | Loss: 0.10546 | Acc: 96.032
Epoch 016: | Loss: 0.10443 | Acc: 96.025
Epoch 017: | Loss: 0.10048 | Acc: 96.148
Epoch 018: | Loss: 0.09396 | Acc: 96.430
Epoch 019: | Loss: 0.09038 | Acc: 96.514
Epoch 020: | Loss: 0.08704 | Acc: 96.695
Epoch 021: | Loss: 0.08715 | Acc: 96.659
Epoch 022: | Loss: 0.08259 | Acc: 96.851
Epoch 023: | Loss: 0.07698 | Acc: 97.064
Epoch 024: | Loss: 0.07653 | Acc: 97.117
Epoch 025: | Los

In [None]:
y_pred_list = []
y_ = []
model.eval()
with torch.no_grad():
    for X_batch,_ in test_loader:
        X_batch = X_batch.to(device)
        y_test_pred = model(X_batch)
        y_test_pred = torch.sigmoid(y_test_pred)
        y_pred_tag = torch.round(y_test_pred)
        y_pred_list.append(y_pred_tag.cpu().numpy())
        y_.append(_.data.cpu().numpy())

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

In [14]:
torch.save(model, MODEL_PATH)

In [15]:
!cp {MODEL_PATH} /content/drive/MyDrive/NSP_classification_model.bin