In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv('diabetes.csv')

In [4]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [31]:
df.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [9]:
df.shape

(768, 9)

In [22]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df, test_size=0.1)

In [53]:
train.to_csv('train.csv')
test.to_csv('test.csv')

In [23]:
X_train = train.iloc[:, 0:8].values
# df.iloc[1:3, 0:3]
y_train = train.iloc[:, 8].values

In [24]:
X_test = test.iloc[:, 0:8].values
# df.iloc[1:3, 0:3]
y_test = test.iloc[:, 8].values

In [25]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)

In [26]:
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1)

In [43]:
# create a PIMA classifier model

class PimaClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(8, 32)
        self.act1 = nn.ReLU()
        self.hidden2 = nn.Linear(32, 16)
        self.act2 = nn.ReLU()
        self.output = nn.Linear(16, 1)
        self.act_output = nn.Sigmoid()

    def forward(self, x):
        x = self.act1(self.hidden1(x))
        x = self.act2(self.hidden2(x))
        x = self.act_output(self.output(x))
        return x

model = PimaClassifier()
print(model)

PimaClassifier(
  (hidden1): Linear(in_features=8, out_features=32, bias=True)
  (act1): ReLU()
  (hidden2): Linear(in_features=32, out_features=16, bias=True)
  (act2): ReLU()
  (output): Linear(in_features=16, out_features=1, bias=True)
  (act_output): Sigmoid()
)


In [44]:
# loss function and optimizer
loss_fn = nn.BCELoss()  # binary cross entropy
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [45]:
def evaluate(model, X_test, y_test):
  with torch.no_grad():
    y_pred = model(X_test)
  accuracy = (y_pred.round() == y_test).float().mean()
  return accuracy


In [46]:
n_epochs = 70
batch_size = 16

for epoch in range(n_epochs):
    for i in range(0, len(X_train), batch_size):
        Xbatch = X_train[i:i+batch_size]
        y_pred = model(Xbatch)
        ybatch = y_train[i:i+batch_size]
        loss = loss_fn(y_pred, ybatch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # evaluate model on each epoch
    accuracy = evaluate(model, X_test, y_test)
    print(f'Finished epoch {epoch}, latest loss {loss}, accuracy {accuracy}')

Finished epoch 0, latest loss 0.31779998540878296, accuracy 0.7532467246055603
Finished epoch 1, latest loss 0.3522582948207855, accuracy 0.7792207598686218
Finished epoch 2, latest loss 0.4068981111049652, accuracy 0.6363636255264282
Finished epoch 3, latest loss 0.42505213618278503, accuracy 0.6103895902633667
Finished epoch 4, latest loss 0.37881383299827576, accuracy 0.6623376607894897
Finished epoch 5, latest loss 0.4114656150341034, accuracy 0.6233766078948975
Finished epoch 6, latest loss 0.41428396105766296, accuracy 0.6363636255264282
Finished epoch 7, latest loss 0.3999655544757843, accuracy 0.649350643157959
Finished epoch 8, latest loss 0.3929203450679779, accuracy 0.6233766078948975
Finished epoch 9, latest loss 0.391946405172348, accuracy 0.6623376607894897
Finished epoch 10, latest loss 0.37939175963401794, accuracy 0.6883116960525513
Finished epoch 11, latest loss 0.3877499997615814, accuracy 0.6623376607894897
Finished epoch 12, latest loss 0.3946484625339508, accuracy

In [47]:
accuracy = evaluate(model, X_test, y_test)
print(accuracy)

tensor(0.7662)


In [48]:
torch.save(model, 'model')

In [49]:
model = torch.load('model')
model.eval()

PimaClassifier(
  (hidden1): Linear(in_features=8, out_features=32, bias=True)
  (act1): ReLU()
  (hidden2): Linear(in_features=32, out_features=16, bias=True)
  (act2): ReLU()
  (output): Linear(in_features=16, out_features=1, bias=True)
  (act_output): Sigmoid()
)

In [50]:
accuracy = evaluate(model, X_test, y_test)
print(accuracy)

tensor(0.7662)


In [52]:
X_test.size()

torch.Size([77, 8])