In [7]:
import torch
from torchvision import datasets, transforms
import numpy as np
from opacus import PrivacyEngine
from tqdm import tqdm
import pandas as pd
import numpy as np
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
from torch.utils.tensorboard import SummaryWriter
tb = SummaryWriter()

class HeartDiseaseModel(nn.Module):
    def __init__(self, input_size):
        super(HeartDiseaseModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(256,128)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(128, 64)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(64, 32)
        self.relu4 = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc5 = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        x = self.fc4(x)
        x = self.relu4(x)
        x = self.dropout(x)
        x = self.fc5(x)
        x = self.sigmoid(x)
        return x

input_size = 22  # number of features

model = HeartDiseaseModel(input_size)

import pandas as pd
cleveland = pd.read_csv('C:/Users/siddh/heart_statlog_cleveland_hungary_final.csv')
print('Shape of DataFrame: {}'.format(cleveland.shape))
print(cleveland.loc[1])

cleveland.head()

data = cleveland[~cleveland.isin(['?'])]
data.loc[280:]
data = data.dropna(axis=0)

#renaming columns
data=data.rename(columns={'chest pain type':'cps','resting bp s':'rbps','fasting blood sugar':'fbs','resting ecg':'recg','max heart rate':'max_heart_rate','exercise angina':'ex_angina','ST slope':'STslope'})

#dealing with categorical variables for better inference with the model

def convert_encoding(data):
    dummies=pd.get_dummies(data['sex'],prefix='sex')
    data=pd.concat([data,dummies],axis=1)
    data.drop('sex',axis=1,inplace=True)

    dummies=pd.get_dummies(data['STslope'],prefix='STslope')
    data=pd.concat([data,dummies],axis=1)
    data.drop('STslope',axis=1,inplace=True)

    dummies=pd.get_dummies(data['ex_angina'],prefix='ex_angina')
    data=pd.concat([data,dummies],axis=1)
    data.drop('ex_angina',axis=1,inplace=True)

    dummies=pd.get_dummies(data['recg'],prefix='recg')
    data=pd.concat([data,dummies],axis=1)
    data.drop('recg',axis=1,inplace=True)

    dummies=pd.get_dummies(data['cps'],prefix='cps')
    data=pd.concat([data,dummies],axis=1)
    data.drop('cps',axis=1,inplace=True)

    dummies=pd.get_dummies(data['fbs'],prefix='fbs')
    data=pd.concat([data,dummies],axis=1)
    data.drop('fbs',axis=1,inplace=True)
    
    return data

data=convert_encoding(data)

y = data['target']
X = data.drop(['target'], axis=1)
y = y.to_numpy()
X = X.to_numpy()
scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=5)

def df_to_tensor(df):
    return torch.from_numpy(df).float()

X_traint = df_to_tensor(X_train)
y_traint = df_to_tensor(y_train)
X_testt = df_to_tensor(X_test)
y_testt = df_to_tensor(y_test)

from torch.utils.data import DataLoader

train_ds = TensorDataset(X_traint, y_traint)
test_ds = TensorDataset(X_testt, y_testt)

# create data loaders
batch_size = 5
train_dataloader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_ds, batch_size=batch_size, shuffle=True)
delta=1e-7

privacy_engine = PrivacyEngine()
loss_fn = nn.BCELoss() # Binary Cross Entropy
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
model, optimizer, dataloader = privacy_engine.make_private_with_epsilon(module=model,optimizer=optimizer,data_loader=train_dataloader,
                                                         max_grad_norm=1.0,target_epsilon=5, epochs=10, target_delta= 1e-7)
criterion = nn.BCELoss()

def train(model, train_dataloader, optimizer, epoch):
    model.train()
    losses = []
    total_loss = 0
    for batch, (data, target) in enumerate(train_dataloader):
        optimizer.zero_grad()
        output = model(data)
        target = target.unsqueeze(1).float() 
        target = target.repeat(1, output.shape[1])
        loss = criterion(output, target)
        total_loss+= loss.item()
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
        epsilon= float(privacy_engine.get_epsilon(delta))
    tb.add_scalar("Loss", total_loss, epoch)
#     tb.add_scalar("Correct", total_correct, epoch)
#     tb.add_scalar("Accuracy", total_correct/ len(train_set), epoch)
    print('Epoch: {}, Avg. Loss: {:.4f}, Epsilon:{:.6f}'.format(epoch, np.mean(losses), epsilon))

def test(model, test_dataloader):
    model.eval()
    predictions = []
    targets = []
    with torch.no_grad():
        for data, target in test_dataloader:
            output = model(data)
            predicted = torch.round(output)
            predictions.extend(predicted.tolist())
            targets.extend(target.tolist())
    acc = accuracy_score(targets, predictions)
    print('Test Accuracy: {:.4f}'.format(acc))
    print('Confusion Matrix:')
    print(confusion_matrix(targets, predictions))
    print('Classification Report:')
    print(classification_report(targets, predictions))

for epoch in range(1, 10):
    train(model, train_dataloader, optimizer, epoch)
    
test(model, test_dataloader)

Shape of DataFrame: (1190, 12)
age                     49.0
sex                      0.0
chest pain type          3.0
resting bp s           160.0
cholesterol            180.0
fasting blood sugar      0.0
resting ecg              0.0
max heart rate         156.0
exercise angina          0.0
oldpeak                  1.0
ST slope                 2.0
target                   1.0
Name: 1, dtype: float64


  z = np.log((np.exp(t) + q - 1) / q)


Epoch: 1, Avg. Loss: 0.6940, Epsilon:3.224408
Epoch: 2, Avg. Loss: 0.6904, Epsilon:3.547584
Epoch: 3, Avg. Loss: 0.6843, Epsilon:3.791562
Epoch: 4, Avg. Loss: 0.6745, Epsilon:4.001491
Epoch: 5, Avg. Loss: 0.6520, Epsilon:4.191486
Epoch: 6, Avg. Loss: 0.6073, Epsilon:4.367924
Epoch: 7, Avg. Loss: 0.5288, Epsilon:4.534301
Epoch: 8, Avg. Loss: 0.4475, Epsilon:4.692778
Epoch: 9, Avg. Loss: 0.4324, Epsilon:4.844799
Test Accuracy: 0.8277
Confusion Matrix:
[[ 87  20]
 [ 21 110]]
Classification Report:
              precision    recall  f1-score   support

         0.0       0.81      0.81      0.81       107
         1.0       0.85      0.84      0.84       131

    accuracy                           0.83       238
   macro avg       0.83      0.83      0.83       238
weighted avg       0.83      0.83      0.83       238

