## Logistic Regression on Diabetes Prediction Dataset

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


In [None]:
df = pd.read_csv('diabetes.csv')
X = df.drop(['Outcome' , 'DiabetesPedigreeFunction' , 'Age', 'Pregnancies' ], axis=1)
y = df['Outcome']

n_features = X.shape[1]


In [None]:
# Split the data into training and testing sets
ts = 0.2 #Test Size
X_train, X_test, y_train, y_test = train_test_split(X, y.to_numpy(), test_size = ts)

# Scaling is recommended in a classification problem
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


In [None]:
# Convert Dataframes to torch tensors
X_train_tensor = torch.tensor(X_train, dtype= torch.float32)
X_test_tensor = torch.tensor(X_test, dtype= torch.float32)
y_train_tensor = torch.tensor(y_train)
y_test_tensor = torch.tensor(y_test)

#Reshaping y tensors to coloumn vector/tensor
y_train_tensor = y_train_tensor.view(y_train_tensor.shape[0], 1)
y_test_tensor = y_test_tensor.view(y_test_tensor.shape[0], 1)


In [None]:
# Define logistic regression model by creating 
class LogisticRegression(nn.Module):
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1) #There is a 1 which represents the 2nd dimension of features
    
    def forward(self, x):
        x = self.linear(x)
        return torch.sigmoid(x)

#Instantiate model
model = LogisticRegression(n_features)


In [None]:
# Specify number of interations and learning rate
n_iters = 1000
learning_rate = 0.03

# Define loss function and optimizer
loss_fn = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


In [None]:
# Training loop

for epoch in range(n_iters):
    
    # Make predictions and calculate loss
    y_pred = model(X_train_tensor)
    loss = loss_fn(y_pred, y_train_tensor.float())

    # Backward pass and update
    loss.backward()
    optimizer.step()

    # Zero grad before new step
    optimizer.zero_grad()

    if epoch % 100 == 0:
        print(f'epoch: {epoch+1}, loss = {loss.item()}')
        

In [None]:
with torch.no_grad():
    y_predicted = model(X_test_tensor)
    y_predicted_cls = y_predicted.round()
    acc = y_predicted_cls.eq(y_test_tensor).sum() / float(y_test_tensor.shape[0])
    print(f'accuracy: {acc.item():.4f}')
    