In [42]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from torchmetrics.classification import BinaryAccuracy
import seaborn as sns

filepath = 'diabetes_clean.csv'
device = 'cpu'

In [43]:
def visualizeData():
    # Load the dataset
    df = pd.read_csv(filepath)
    df = df.astype(float)

    color_map = {0: 'green', 1: 'red'}
    colors = df['Outcome'].map(color_map)
    # Create pairplot
    sns.pairplot(df, hue='Outcome', palette=color_map)

    # Show plot
    plt.show()

#visualizeData()

In [44]:
class CustomDataset(Dataset):
    def __init__(self, features, labels):        
        # Extract features and labels
        self.features = features
        self.labels = labels

        # Normalize features
        self.scaler = StandardScaler()
        self.features = self.scaler.fit_transform(self.features)
        
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        sample = {
            'features': torch.tensor(self.features[idx], dtype=torch.float32),
            'label': torch.tensor(self.labels[idx], dtype=torch.float32)
        }
        return sample

# Load dataset
data = pd.read_csv(filepath)

# Split the dataset into features and labels
X = data.iloc[:, :-1]  # Features
y = data.iloc[:, -1]   # Labels

In [90]:
class NeuralNetwork3(nn.Module):
    def __init__(self, n_x, n_h1, n_h2, n_y):
        # Super init
        super(NeuralNetwork3, self).__init__()
        
        # Layers to use
        self.fc1 = nn.Linear(n_x, n_h1, dtype = torch.float32)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(n_h1, n_h2, dtype = torch.float32)
        self.fc3 = nn.Linear(n_h2, n_y, dtype = torch.float32)
        self.final_activation = nn.Sigmoid()
        
        # Loss and Accuracy metrics
        self.loss = nn.BCELoss()
        self.accuracy = BinaryAccuracy() 

    def forward(self, x):
        # All operations for forward, in order
        out1 = self.fc1(x)
        out2 = self.relu(out1)
        out3 = self.fc2(out2)
        out4 = self.relu(out3)
        out5 = self.fc3(out4)
        out6 = self.final_activation(out5)
        return out6
    
class NeuralNetwork4(nn.Module):
    def __init__(self, n_x, n_h1, n_h2, n_h3, n_y):
        # Super init
        super(NeuralNetwork4, self).__init__()
        
        # Layers to use
        self.fc1 = nn.Linear(n_x, n_h1, dtype = torch.float32)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(n_h1, n_h2, dtype = torch.float32)
        self.fc3 = nn.Linear(n_h2, n_h3, dtype = torch.float32)
        self.fc4 = nn.Linear(n_h3, n_y, dtype = torch.float32)
        self.final_activation = nn.Sigmoid()
        
        # Loss and Accuracy metrics
        self.loss = nn.BCELoss()
        self.accuracy = BinaryAccuracy() 

    def forward(self, x):
        # All operations for forward, in order
        out1 = self.fc1(x)
        out2 = self.relu(out1)
        out3 = self.fc2(out2)
        out4 = self.relu(out3)
        out5 = self.fc3(out4)
        out6 = self.relu(out5)
        out7 = self.fc4(out6)
        out8 = self.final_activation(out7)
        return out8
    
# Create Neural Network model
n_x = 8
n_h1 = 32
n_h2 = 32
n_h3 = 16
n_y = 1
model = NeuralNetwork3(n_x, n_h1, n_h2, n_y).to(device)
model = NeuralNetwork4(n_x, n_h1, n_h2, n_h3, n_y).to(device)

In [91]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Reset index after the split
X_train.reset_index(drop=True, inplace=True)
X_test.reset_index(drop=True, inplace=True)
y_train.reset_index(drop=True, inplace=True)
y_test.reset_index(drop=True, inplace=True)

# Create training and testing datasets
dataset = CustomDataset(X_train, y_train)
testset = CustomDataset(X_test, y_test)

batch_size = 64  # Choose batch size
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [96]:
# Gradient descent parameters: optimizers, repetitions, etc.
num_epochs = 32
optimizer = torch.optim.Adam(model.parameters(),
                             lr = 1e-2,
                             betas = (0.9, 0.999),
                             eps = 1e-08)
optimizer.zero_grad()

for epoch in range(num_epochs):
    for batch in data_loader:
        inputs_re = batch['features'].to(device)
        outputs_re = batch['label'].to(device).reshape(-1, 1)
        
        # Forward pass
        pred = model(inputs_re)
        loss_value = model.loss(pred, outputs_re)
        # Compute binary accuracy
        binary_accuracy_value = model.accuracy(pred, outputs_re)
    
        # Backward pass and optimization
        loss_value.backward()
        optimizer.step()
        optimizer.zero_grad()
        
    # Print loss and accuracy
    print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {loss_value.item():.4f}, Training Accuracy: {binary_accuracy_value.item():.4f}')

Epoch [1/32], Training Loss: 0.1298, Training Accuracy: 0.9474
Epoch [2/32], Training Loss: 0.0296, Training Accuracy: 0.9737
Epoch [3/32], Training Loss: 0.0369, Training Accuracy: 1.0000
Epoch [4/32], Training Loss: 0.0108, Training Accuracy: 1.0000
Epoch [5/32], Training Loss: 0.1254, Training Accuracy: 0.9474
Epoch [6/32], Training Loss: 0.0189, Training Accuracy: 1.0000
Epoch [7/32], Training Loss: 0.0459, Training Accuracy: 0.9737
Epoch [8/32], Training Loss: 0.0214, Training Accuracy: 1.0000
Epoch [9/32], Training Loss: 0.0685, Training Accuracy: 0.9737
Epoch [10/32], Training Loss: 0.0658, Training Accuracy: 0.9737
Epoch [11/32], Training Loss: 0.1730, Training Accuracy: 0.9737
Epoch [12/32], Training Loss: 0.0408, Training Accuracy: 0.9737
Epoch [13/32], Training Loss: 0.2331, Training Accuracy: 0.9474
Epoch [14/32], Training Loss: 0.0671, Training Accuracy: 0.9737
Epoch [15/32], Training Loss: 0.4050, Training Accuracy: 0.9474
Epoch [16/32], Training Loss: 0.0722, Training Ac

In [97]:
# Define a function for evaluating the model
def evaluate_model(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    running_loss = 0.0

    with torch.no_grad():  # No need to compute gradients during evaluation
        for batch in test_loader:
            inputs, labels = batch['features'], batch['label']
            outputs = model(inputs)
            loss = model.loss(outputs, labels.unsqueeze(1).float())  # Calculate the loss

            # Compute accuracy
            predicted = torch.round(outputs)  # Assuming sigmoid activation for binary classification
            total += labels.size(0)
            correct += (predicted == labels.unsqueeze(1)).sum().item()
            running_loss += loss.item()

    accuracy = correct / total
    avg_loss = running_loss / len(test_loader)

    print(f'Accuracy on test set: {accuracy:.4f}')
    print(f'Average Loss on test set: {avg_loss:.4f}')

test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False)

# Evaluate the model on the test set
evaluate_model(model, test_loader)

Accuracy on test set: 0.6818
Average Loss on test set: 9.2080
