In [816]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import StandardScaler    
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

df = pd.read_csv('Phenotypic_V1_0b_preprocessed1.csv')
print(df.shape)
df.head(5)

(1112, 13)


Unnamed: 0.1,Unnamed: 0,SUB_ID,X,subject,SITE_ID,FILE_ID,DX_GROUP,anat_cnr,anat_efc,anat_fber,anat_fwhm,anat_qi1,anat_snr
0,1,50002,1,50002,PITT,no_filename,1,10.201539,1.194664,16.223458,3.878,0.152711,12.072452
1,2,50003,2,50003,PITT,Pitt_0050003,1,7.165701,1.126752,10.460008,4.282238,0.161716,9.241155
2,3,50004,3,50004,PITT,Pitt_0050004,1,7.698144,1.226218,9.72575,3.881684,0.174186,9.323463
3,4,50005,4,50005,PITT,Pitt_0050005,1,9.071807,1.256278,11.198226,3.628667,0.119269,10.8142
4,5,50006,5,50006,PITT,Pitt_0050006,1,8.026798,1.407166,6.282055,3.674539,0.130647,10.123574


In [817]:
df['DX_GROUP'] = df['DX_GROUP'].astype('category')
encode_map = {
    1: 0,
    2:1
}

df['DX_GROUP'].replace(encode_map, inplace=True)
df.head(5)

Unnamed: 0.1,Unnamed: 0,SUB_ID,X,subject,SITE_ID,FILE_ID,DX_GROUP,anat_cnr,anat_efc,anat_fber,anat_fwhm,anat_qi1,anat_snr
0,1,50002,1,50002,PITT,no_filename,0,10.201539,1.194664,16.223458,3.878,0.152711,12.072452
1,2,50003,2,50003,PITT,Pitt_0050003,0,7.165701,1.126752,10.460008,4.282238,0.161716,9.241155
2,3,50004,3,50004,PITT,Pitt_0050004,0,7.698144,1.226218,9.72575,3.881684,0.174186,9.323463
3,4,50005,4,50005,PITT,Pitt_0050005,0,9.071807,1.256278,11.198226,3.628667,0.119269,10.8142
4,5,50006,5,50006,PITT,Pitt_0050006,0,8.026798,1.407166,6.282055,3.674539,0.130647,10.123574


In [818]:
#Dropping empty columns
df['DX_GROUP'].replace(2, 0, inplace=True) #So sigmoid function gives right output. if you replace sigmoid, you can skip this

df['anat_cnr'].replace('', np.nan, inplace=True)
df['anat_efc'].replace('', np.nan, inplace=True)
df['anat_fber'].replace('', np.nan, inplace=True)
df['anat_fwhm'].replace('', np.nan, inplace=True)
df['anat_qi1'].replace('', np.nan, inplace=True)
df['anat_snr'].replace('', np.nan, inplace=True)

#Replacing null values in all relevant input columns
df.dropna(subset=['anat_cnr','anat_efc', 'anat_fber', 'anat_fwhm', 'anat_qi1', 'anat_snr'], inplace=True)

#Verifying number of null rows
print("Number of null values:")
print(df.isnull().sum())

Number of null values:
Unnamed: 0    0
SUB_ID        0
X             0
subject       0
SITE_ID       0
FILE_ID       0
DX_GROUP      0
anat_cnr      0
anat_efc      0
anat_fber     0
anat_fwhm     0
anat_qi1      0
anat_snr      0
dtype: int64


In [819]:
X=df[['anat_cnr','anat_efc', 'anat_fber', 'anat_fwhm', 'anat_qi1', 'anat_snr']]
y=df['DX_GROUP']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=69)

In [820]:
EPOCHS = 50
BATCH_SIZE = 64
LEARNING_RATE = 0.001

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#y_train =  torch.from_numpy(y_train.ravel()).float()
y_train = torch.Tensor(y_train.values)
y_test = torch.Tensor(y_test.values)

print(y_train)
print(X_test)

tensor([0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 1., 0., 0.,
        0., 1., 0., 0., 1., 1., 1., 0., 0., 1., 0., 1., 1., 0., 0., 1., 1., 0.,
        0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0.,
        1., 1., 1., 0., 1., 1., 1., 0., 1., 1., 0., 0., 0., 0., 0., 1., 1., 0.,
        1., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0., 1., 0., 1., 0.,
        0., 1., 0., 0., 0., 1., 1., 1., 1., 0., 1., 0., 0., 1., 1., 0., 1., 0.,
        1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0., 1., 0., 0., 1., 0.,
        1., 0., 1., 0., 0., 1., 1., 1., 0., 0., 1., 1., 0., 1., 0., 1., 0., 0.,
        1., 0., 1., 0., 1., 0., 1., 0., 1., 1., 1., 1., 0., 0., 1., 1., 0., 0.,
        0., 0., 1., 0., 1., 1., 1., 0., 1., 1., 0., 0., 0., 1., 1., 1., 0., 0.,
        0., 0., 1., 1., 1., 0., 0., 0., 0., 1., 1., 0., 0., 1., 1., 0., 0., 0.,
        0., 0., 1., 1., 1., 0., 1., 1., 0., 1., 1., 0., 1., 1., 1., 0., 0., 0.,
        1., 1., 1., 0., 0., 0., 0., 0., 

In [821]:
#Function to train data
class TrainData(Dataset):
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)

#Calling dataset class to train on X and y tensors
train_data = TrainData(torch.FloatTensor(X_train), 
                       torch.FloatTensor(y_train))
#Function to test data
class TestData(Dataset):
    
    def __init__(self, X_data):
        self.X_data = X_data
        
    def __getitem__(self, index):
        return self.X_data[index]
        
    def __len__ (self):
        return len(self.X_data)
    
#Calling dataset class to test on X tensor
test_data = TestData(torch.FloatTensor(X_test))

In [822]:
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=1)

In [823]:
class BinaryClassification(nn.Module):
    def __init__(self):
        super(BinaryClassification, self).__init__()
        self.layer_1 = nn.Linear(6, 3) 
        self.layer_2 = nn.Linear(3, 2)
        self.layer_out = nn.Linear(2, 1) 
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.1)
        self.batchnorm1 = nn.BatchNorm1d(3)
        self.batchnorm2 = nn.BatchNorm1d(2)
        
    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.batchnorm1(x)
        x = self.relu(self.layer_2(x))
        x = self.batchnorm2(x)
        x = self.dropout(x)
        x = self.layer_out(x)
        
        return x

In [824]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

model = BinaryClassification()
model.to(device)
print(model)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)


cpu
BinaryClassification(
  (layer_1): Linear(in_features=6, out_features=3, bias=True)
  (layer_2): Linear(in_features=3, out_features=2, bias=True)
  (layer_out): Linear(in_features=2, out_features=1, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.1, inplace=False)
  (batchnorm1): BatchNorm1d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [825]:
def binary_acc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))

    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    
    return acc

In [826]:
model.train()
for e in range(1, EPOCHS+1):
    epoch_loss = 0
    epoch_acc = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        
        y_pred = model(X_batch)
        
        loss = criterion(y_pred, y_batch.unsqueeze(1))
        acc = binary_acc(y_pred, y_batch.unsqueeze(1))
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        

    print(f'Epoch {e+0:03}: | Train Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')

Epoch 001: | Train Loss: 0.69287 | Acc: 51.417
Epoch 002: | Train Loss: 0.69161 | Acc: 51.750
Epoch 003: | Train Loss: 0.69175 | Acc: 51.250
Epoch 004: | Train Loss: 0.69085 | Acc: 54.000
Epoch 005: | Train Loss: 0.69091 | Acc: 52.083
Epoch 006: | Train Loss: 0.69099 | Acc: 51.333
Epoch 007: | Train Loss: 0.68763 | Acc: 53.750
Epoch 008: | Train Loss: 0.68915 | Acc: 53.833
Epoch 009: | Train Loss: 0.68798 | Acc: 52.833
Epoch 010: | Train Loss: 0.68959 | Acc: 51.667
Epoch 011: | Train Loss: 0.68934 | Acc: 53.917
Epoch 012: | Train Loss: 0.68859 | Acc: 53.333
Epoch 013: | Train Loss: 0.68822 | Acc: 54.333
Epoch 014: | Train Loss: 0.68535 | Acc: 55.833
Epoch 015: | Train Loss: 0.68874 | Acc: 54.667
Epoch 016: | Train Loss: 0.68900 | Acc: 54.167
Epoch 017: | Train Loss: 0.68486 | Acc: 54.000
Epoch 018: | Train Loss: 0.68821 | Acc: 53.833
Epoch 019: | Train Loss: 0.68365 | Acc: 56.333
Epoch 020: | Train Loss: 0.68584 | Acc: 54.583
Epoch 021: | Train Loss: 0.68143 | Acc: 57.000
Epoch 022: | 

In [827]:
y_pred_list = []
model.eval()
with torch.no_grad():
    for X_batch in test_loader:
        X_batch = X_batch.to(device)
        y_test_pred = model(X_batch)
        y_test_pred = torch.sigmoid(y_test_pred)
        y_pred_tag = torch.round(y_test_pred)
        y_pred_list.append(y_pred_tag.cpu().numpy())
        
    y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
#print(y_pred_list)    

In [828]:
model.eval()
for e in range(1, EPOCHS+1):
    epoch_loss = 0
    epoch_acc = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        
        y_pred = model(X_batch)
        
        loss = criterion(y_pred, y_batch.unsqueeze(1))
        acc = binary_acc(y_pred, y_batch.unsqueeze(1))
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        

    print(f'Epoch {e+0:03}: | Valid Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')

Epoch 001: | Valid Loss: 0.68287 | Acc: 54.917
Epoch 002: | Valid Loss: 0.68288 | Acc: 54.917
Epoch 003: | Valid Loss: 0.68157 | Acc: 55.000
Epoch 004: | Valid Loss: 0.68092 | Acc: 55.083
Epoch 005: | Valid Loss: 0.68033 | Acc: 55.417
Epoch 006: | Valid Loss: 0.68098 | Acc: 54.667
Epoch 007: | Valid Loss: 0.68156 | Acc: 54.417
Epoch 008: | Valid Loss: 0.68170 | Acc: 54.250
Epoch 009: | Valid Loss: 0.68135 | Acc: 54.583
Epoch 010: | Valid Loss: 0.68291 | Acc: 53.667
Epoch 011: | Valid Loss: 0.68104 | Acc: 54.083
Epoch 012: | Valid Loss: 0.68371 | Acc: 54.000
Epoch 013: | Valid Loss: 0.68146 | Acc: 54.333
Epoch 014: | Valid Loss: 0.67998 | Acc: 54.500
Epoch 015: | Valid Loss: 0.68041 | Acc: 54.333
Epoch 016: | Valid Loss: 0.68058 | Acc: 54.750
Epoch 017: | Valid Loss: 0.68159 | Acc: 54.500
Epoch 018: | Valid Loss: 0.68201 | Acc: 54.167
Epoch 019: | Valid Loss: 0.67969 | Acc: 55.917
Epoch 020: | Valid Loss: 0.67945 | Acc: 54.667
Epoch 021: | Valid Loss: 0.67825 | Acc: 55.917
Epoch 022: | 

In [829]:
print(classification_report(y_test, y_pred_list))

              precision    recall  f1-score   support

         0.0       0.59      0.41      0.49       176
         1.0       0.57      0.73      0.64       187

    accuracy                           0.58       363
   macro avg       0.58      0.57      0.56       363
weighted avg       0.58      0.58      0.57       363

