In [14]:
import torch
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import StandardScaler    
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

# 1.Read Data File

In [16]:
dataframe = pd.read_csv("Data File/final_model_data.csv")

In [17]:
dataframe = dataframe.rename(columns={'mandeath':'target'})
dataframe = dataframe.drop('Unnamed: 0',axis=1)
dataframe = dataframe.drop('index',axis=1)

In [18]:
y = dataframe['target']
X=dataframe.drop('target',axis=1)

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=69)

In [20]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 3. Training

## 3.1 Decision Tree


In [21]:
EPOCHS = 50
BATCH_SIZE = 64
LEARNING_RATE = 0.001

## train data
class TrainData(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)


train_data = TrainData(torch.FloatTensor(X_train), 
                       torch.FloatTensor(y_train))

## test data    
class TestData(Dataset):
    
    def __init__(self, X_data):
        self.X_data = X_data
        
    def __getitem__(self, index):
        return self.X_data[index]
        
    def __len__ (self):
        return len(self.X_data)
    

test_data = TestData(torch.FloatTensor(X_test))

train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=1)

In [22]:
class BinaryClassification(nn.Module):
    def __init__(self):
        super(BinaryClassification, self).__init__()
        # Number of input features is 12.
        self.layer_1 = nn.Linear(53, 64) 
        self.layer_2 = nn.Linear(64, 64)
        self.layer_out = nn.Linear(64, 1) 
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.1)
        self.batchnorm1 = nn.BatchNorm1d(64)
        self.batchnorm2 = nn.BatchNorm1d(64)
        
    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.batchnorm1(x)
        x = self.relu(self.layer_2(x))
        x = self.batchnorm2(x)
        x = self.dropout(x)
        x = self.layer_out(x)
        
        return x

In [23]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [24]:
model = BinaryClassification()
model.to(device)
print(model)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

BinaryClassification(
  (layer_1): Linear(in_features=53, out_features=64, bias=True)
  (layer_2): Linear(in_features=64, out_features=64, bias=True)
  (layer_out): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.1, inplace=False)
  (batchnorm1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [25]:
def binary_acc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))

    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    
    return acc

In [26]:
model.train()
for e in range(1, EPOCHS+1):
    epoch_loss = 0
    epoch_acc = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        
        y_pred = model(X_batch)
        
        loss = criterion(y_pred, y_batch.unsqueeze(1))
        acc = binary_acc(y_pred, y_batch.unsqueeze(1))
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        

    print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')

Epoch 001: | Loss: 0.11198 | Acc: 96.296
Epoch 002: | Loss: 0.10252 | Acc: 96.518
Epoch 003: | Loss: 0.09960 | Acc: 96.596
Epoch 004: | Loss: 0.09807 | Acc: 96.621
Epoch 005: | Loss: 0.09701 | Acc: 96.642
Epoch 006: | Loss: 0.09650 | Acc: 96.668
Epoch 007: | Loss: 0.09599 | Acc: 96.670
Epoch 008: | Loss: 0.09563 | Acc: 96.669
Epoch 009: | Loss: 0.09524 | Acc: 96.676
Epoch 010: | Loss: 0.09475 | Acc: 96.691
Epoch 011: | Loss: 0.09459 | Acc: 96.708
Epoch 012: | Loss: 0.09429 | Acc: 96.719
Epoch 013: | Loss: 0.09374 | Acc: 96.730
Epoch 014: | Loss: 0.09359 | Acc: 96.727
Epoch 015: | Loss: 0.09342 | Acc: 96.731
Epoch 016: | Loss: 0.09303 | Acc: 96.746
Epoch 017: | Loss: 0.09307 | Acc: 96.743
Epoch 018: | Loss: 0.09279 | Acc: 96.745
Epoch 019: | Loss: 0.09249 | Acc: 96.766
Epoch 020: | Loss: 0.09238 | Acc: 96.755
Epoch 021: | Loss: 0.09226 | Acc: 96.752
Epoch 022: | Loss: 0.09194 | Acc: 96.778
Epoch 023: | Loss: 0.09185 | Acc: 96.793
Epoch 024: | Loss: 0.09188 | Acc: 96.782
Epoch 025: | Los

In [27]:
y_pred_list = []
model.eval()
with torch.no_grad():
    for X_batch in test_loader:
        X_batch = X_batch.to(device)
        y_test_pred = model(X_batch)
        y_test_pred = torch.sigmoid(y_test_pred)
        y_pred_tag = torch.round(y_test_pred)
        y_pred_list.append(y_pred_tag.cpu().numpy())

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

In [28]:
confusion_matrix(y_test, y_pred_list)

array([[ 98489,   5361],
       [  1237, 102891]])

In [29]:
print(classification_report(y_test, y_pred_list))

              precision    recall  f1-score   support

         0.0       0.99      0.95      0.97    103850
         1.0       0.95      0.99      0.97    104128

    accuracy                           0.97    207978
   macro avg       0.97      0.97      0.97    207978
weighted avg       0.97      0.97      0.97    207978

