In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F 
import seaborn as sns
import matplotlib.pyplot as plt
from torch.utils.data import Dataset


In [2]:
USE_GPU = True
num_class = 100
dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 100

print('using device:', device)

using device: cuda


In [3]:
df=pd.read_csv('heart.csv')

In [4]:
def convert_y(x):
    if x == "No":
        x = 0
    else:
        x = 1
    return x
df["HeartDisease"] = df["HeartDisease"].apply( lambda x: convert_y(x))
from sklearn.preprocessing import LabelEncoder
def label_transformation(x):
    le = LabelEncoder()
    le_encoded = le.fit_transform(x)
    return le_encoded
df_transformed = df.copy()
categorical_columns = ["Smoking","Stroke","DiffWalking","AlcoholDrinking","AgeCategory","Diabetic","GenHealth","Sex","Race", "PhysicalActivity","SkinCancer","KidneyDisease","Asthma"]
for x in categorical_columns:
    df_transformed[x] = label_transformation(df[x])
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df_transformed[["BMI","PhysicalHealth","MentalHealth"]] = scaler.fit_transform(df_transformed[["BMI","PhysicalHealth","MentalHealth"]])
from sklearn.model_selection import train_test_split 
#X = df_transformed.drop("HeartDisease",axis=1)

#y = df_transformed["HeartDisease"]
# Train & Test split

#x_train=df_transformed
x_train, x_test= train_test_split(df_transformed,test_size=0.25,random_state=21)

In [5]:
class heartdataset(Dataset):
    def __init__(self, df):
        x=df.iloc[1:, 1:18].values
        y=df.iloc[1:, 0].values
        sc=StandardScaler()
        x_train=sc.fit_transform(x)
        self.X_train=torch.tensor(x_train, dtype=torch.float32)
        self.y_train=torch.tensor(y)
    def __len__(self):
        return len(self.y_train)
    def __getitem__(self, idx):
        return self.X_train[idx], self.y_train[idx]
x_train_loader=heartdataset(x_train)
x_test_loader=heartdataset(x_test)


x_train_loader = torch.utils.data.DataLoader(x_train_loader, batch_size=128, shuffle=True)
x_test_loader = torch.utils.data.DataLoader(x_test_loader, batch_size=128, shuffle=True)

In [6]:
df_transformed.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,0,-1.84475,1,0,0,-0.046751,3.281069,0,0,7,5,2,1,4,5.0,1,0,1
1,0,-1.256338,0,0,1,-0.42407,-0.490039,0,0,12,5,0,1,4,7.0,0,0,0
2,0,-0.274603,1,0,0,2.091388,3.281069,0,1,9,5,2,1,1,8.0,1,0,0
3,0,-0.647473,0,0,0,-0.42407,-0.490039,0,0,11,5,0,0,2,6.0,0,0,1
4,0,-0.726138,0,0,0,3.097572,-0.490039,1,0,4,5,0,1,4,8.0,0,0,0


In [7]:
class heart(nn.Module):
    def __init__(self):
        super().__init__()
      
        self.fc1 = nn.Linear(17, 1000)
        self.fc2 = nn.Linear(1000, 500)
        self.fc3 = nn.Linear(500, 2)
        self.Sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        scores = None

        x=self.relu(self.fc1(x))
        x=self.relu(self.fc2(x))

        scores=self.fc3(x)

        return scores
#319795


In [8]:
def train_heart(model, optimizer, epochs=10):
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        for t, (x,y) in enumerate(x_train_loader):

            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % print_every == 0:
                print('Epoch %d, Iteration %d, loss = %.4f' % (e, t, loss.item()))
                check_accuracy_heart(x_test_loader, model)
                print()

In [9]:
def check_accuracy_heart(x_test_loader, model):

    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x,y in x_test_loader: 
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [10]:
learning_rate=1e-5
model = heart()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

train_heart(model, optimizer)

Epoch 0, Iteration 0, loss = 0.7043
Got 32184 / 79948 correct (40.26)

Epoch 0, Iteration 100, loss = 0.7039
Got 35551 / 79948 correct (44.47)

Epoch 0, Iteration 200, loss = 0.6999
Got 38741 / 79948 correct (48.46)

Epoch 0, Iteration 300, loss = 0.6824
Got 42058 / 79948 correct (52.61)

Epoch 0, Iteration 400, loss = 0.6841
Got 45098 / 79948 correct (56.41)

Epoch 0, Iteration 500, loss = 0.6831
Got 48057 / 79948 correct (60.11)

Epoch 0, Iteration 600, loss = 0.6796
Got 50937 / 79948 correct (63.71)

Epoch 0, Iteration 700, loss = 0.6693
Got 53603 / 79948 correct (67.05)

Epoch 0, Iteration 800, loss = 0.6614
Got 56345 / 79948 correct (70.48)

Epoch 0, Iteration 900, loss = 0.6684
Got 58991 / 79948 correct (73.79)

Epoch 0, Iteration 1000, loss = 0.6642
Got 61834 / 79948 correct (77.34)

Epoch 0, Iteration 1100, loss = 0.6497
Got 64533 / 79948 correct (80.72)

Epoch 0, Iteration 1200, loss = 0.6420
Got 66919 / 79948 correct (83.70)

Epoch 0, Iteration 1300, loss = 0.6459
Got 68853 /