In [1]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
data = pd.read_csv('diabetes.csv')


In [3]:
x = data.iloc[:,0:-1].values
y_string= list(data.iloc[:,-1])

In [4]:
print(x[:3])
print(y_string[:3])

[[  6.  148.   72.   35.    0.   33.6  50. ]
 [  1.   85.   66.   29.    0.   26.6  31. ]
 [  8.  183.   64.    0.    0.   23.3  32. ]]
['positive', 'negative', 'positive']


In [5]:
y_int = []
for string in y_string:
    if string == 'positive':
        y_int.append(1)
    else:
        y_int.append(0)

In [6]:

y = np.array(y_int, dtype = 'float64')

In [7]:
# Feature Normalization
sc = StandardScaler()
x = sc.fit_transform(x)

In [8]:
x = torch.tensor(x)
y = torch.tensor(y).unsqueeze(1)

In [9]:
print(x.shape)
print(y.shape)

torch.Size([768, 7])
torch.Size([768, 1])


In [10]:
class Dataset(Dataset):
    def __init__(self,x,y):
        self.x = x
        self.y = y    
    def __getitem__(self,index):   
        return self.x[index], self.y[index]
    def __len__(self):
        return len(self.x)

In [11]:
dataset = Dataset(x,y)

In [12]:
len(dataset)

768

In [13]:
train_loader = torch.utils.data.DataLoader(dataset=dataset,
                                           batch_size=32,
                                           shuffle=True)

In [14]:
print("There is {} batches in the dataset".format(len(train_loader)))
for (x,y) in train_loader:
    print("For one iteration (batch), there is:")
    print("Data:    {}".format(x.shape))
    print("Labels:  {}".format(y.shape))
    break

There is 24 batches in the dataset
For one iteration (batch), there is:
Data:    torch.Size([32, 7])
Labels:  torch.Size([32, 1])


In [15]:
class Model(nn.Module):
    def __init__(self, input_features,output_features):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(input_features, 5)
        self.fc2 = nn.Linear(5, 4)
        self.fc3 = nn.Linear(4, 3)
        self.fc4 = nn.Linear(3, output_features)
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()

    def forward(self, x):
        out = self.fc1(x)
        out = self.tanh(out)
        out = self.fc2(out)
        out = self.tanh(out)
        out = self.fc3(out)
        out = self.tanh(out)
        out = self.fc4(out)
        out = self.sigmoid(out)
        return out

In [16]:
net = Model(7,1)
criterion = torch.nn.BCELoss(size_average=True)   
optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9)



In [17]:
num_epochs = 225
for epoch in range(num_epochs):
    for inputs,labels in train_loader:
        inputs = inputs.float()
        labels = labels.float()
       
        output = net(inputs)
      
        loss = criterion(output, labels)    
        optimizer.zero_grad()
       
        loss.backward()
        optimizer.step()
    output = (output>0.5).float()
    accuracy = (output == labels).float().mean() 
    print("Epoch {}/{}, Loss: {:.3f}, Accuracy: {:.3f}".format(epoch+1,num_epochs, loss, accuracy))

Epoch 1/225, Loss: 0.532, Accuracy: 0.750
Epoch 2/225, Loss: 0.486, Accuracy: 0.812
Epoch 3/225, Loss: 0.498, Accuracy: 0.719
Epoch 4/225, Loss: 0.429, Accuracy: 0.844
Epoch 5/225, Loss: 0.511, Accuracy: 0.719
Epoch 6/225, Loss: 0.581, Accuracy: 0.719
Epoch 7/225, Loss: 0.567, Accuracy: 0.719
Epoch 8/225, Loss: 0.491, Accuracy: 0.688
Epoch 9/225, Loss: 0.486, Accuracy: 0.812
Epoch 10/225, Loss: 0.518, Accuracy: 0.781
Epoch 11/225, Loss: 0.449, Accuracy: 0.875
Epoch 12/225, Loss: 0.491, Accuracy: 0.812
Epoch 13/225, Loss: 0.454, Accuracy: 0.750
Epoch 14/225, Loss: 0.454, Accuracy: 0.781
Epoch 15/225, Loss: 0.366, Accuracy: 0.812
Epoch 16/225, Loss: 0.357, Accuracy: 0.875
Epoch 17/225, Loss: 0.551, Accuracy: 0.688
Epoch 18/225, Loss: 0.504, Accuracy: 0.688
Epoch 19/225, Loss: 0.498, Accuracy: 0.719
Epoch 20/225, Loss: 0.381, Accuracy: 0.812
Epoch 21/225, Loss: 0.396, Accuracy: 0.781
Epoch 22/225, Loss: 0.481, Accuracy: 0.781
Epoch 23/225, Loss: 0.272, Accuracy: 0.906
Epoch 24/225, Loss: 