In [29]:
# Import torch and neural network library:
import torch
import torch.nn as nn

# import sklearn model_selection, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# import numpy, pandas, matplotlib, seaborn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Setup device either gpu or cpu
if torch.cuda.is_available():
    device = torch.device('cuda')
    print(f"There are {torch.cuda.device_count()} GPU(s) available.")
    print("Device name:", torch.cuda.get_device_name(0))
else:
    device = torch.device('cpu')
    print("No GPU available, using CPU instead.")

print('Device used:', device)

No GPU available, using CPU instead.
Device used: cpu


In [30]:
# read the dataset framingham.csv and display the first 5 rows.
df = pd.read_csv("framingham.csv")

# Display the first 5 rows
print(df.head())

   male  age  education  currentSmoker  cigsPerDay  BPMeds  prevalentStroke  \
0     1   39        4.0              0         0.0     0.0                0   
1     0   46        2.0              0         0.0     0.0                0   
2     1   48        1.0              1        20.0     0.0                0   
3     0   61        3.0              1        30.0     0.0                0   
4     0   46        3.0              1        23.0     0.0                0   

   prevalentHyp  diabetes  totChol  sysBP  diaBP    BMI  heartRate  glucose  \
0             0         0    195.0  106.0   70.0  26.97       80.0     77.0   
1             0         0    250.0  121.0   81.0  28.73       95.0     76.0   
2             0         0    245.0  127.5   80.0  25.34       75.0     70.0   
3             1         0    225.0  150.0   95.0  28.58       65.0    103.0   
4             0         0    285.0  130.0   84.0  23.10       85.0     85.0   

   TenYearCHD  
0           0  
1           0  
2 

In [31]:
# display the shape, null values
print("\nNumber of null values per column:")
print(df.isnull().sum())


Number of null values per column:
male                 0
age                  0
education          105
currentSmoker        0
cigsPerDay          29
BPMeds              53
prevalentStroke      0
prevalentHyp         0
diabetes             0
totChol             50
sysBP                0
diaBP                0
BMI                 19
heartRate            1
glucose            388
TenYearCHD           0
dtype: int64


In [32]:
# Fill null values with either median or mean.
numerical_cols = df.select_dtypes(include=['number']).columns
for col in numerical_cols:
        mean_val = df[col].mean()
        df[col].fillna(mean_val, inplace=True)
print("Null values in numerical columns filled with the mean.")

Null values in numerical columns filled with the mean.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(mean_val, inplace=True)


In [33]:
# get the features as X and the target column as y.
target_column = 'TenYearCHD'
if target_column in df.columns:
        X = df.drop(columns=[target_column])  # Features are all columns except the target
        y = df[target_column]                 # Target variable

        print("Shape of features (X):", X.shape)
        print("Shape of target (y):", y.shape)
        print("\nFirst 5 rows of features (X):")
        print(X.head())
        print("\nFirst 5 values of target (y):")
        print(y.head())
else:
        print(f"Error: The target column '{target_column}' was not found in the dataset.")

Shape of features (X): (4240, 15)
Shape of target (y): (4240,)

First 5 rows of features (X):
   male  age  education  currentSmoker  cigsPerDay  BPMeds  prevalentStroke  \
0     1   39        4.0              0         0.0     0.0                0   
1     0   46        2.0              0         0.0     0.0                0   
2     1   48        1.0              1        20.0     0.0                0   
3     0   61        3.0              1        30.0     0.0                0   
4     0   46        3.0              1        23.0     0.0                0   

   prevalentHyp  diabetes  totChol  sysBP  diaBP    BMI  heartRate  glucose  
0             0         0    195.0  106.0   70.0  26.97       80.0     77.0  
1             0         0    250.0  121.0   81.0  28.73       95.0     76.0  
2             0         0    245.0  127.5   80.0  25.34       75.0     70.0  
3             1         0    225.0  150.0   95.0  28.58       65.0    103.0  
4             0         0    285.0  130.0

In [34]:
# Split the data into train and test:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [35]:
# Scale the features X_train and X_test using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Convert the X_train, X_test, y_train, y_test to torch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

In [36]:
# Build your neural network
class BinaryClassifier(nn.Module):
    def __init__(self, input_dim):
        super(BinaryClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(64, 32)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(p=0.3)
        self.fc3 = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x


In [37]:
# Instantiate the model, define the loss function and the optimizer
if 'X_train_tensor' in locals():
    # Get the input dimension (number of features)
    input_dim = X_train_tensor.shape[1]

    # Instantiate the model
    model = BinaryClassifier(input_dim)

    print("Neural Network Architecture:")
    print(model)

Neural Network Architecture:
BinaryClassifier(
  (fc1): Linear(in_features=15, out_features=64, bias=True)
  (relu1): ReLU()
  (dropout1): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (relu2): ReLU()
  (dropout2): Dropout(p=0.3, inplace=False)
  (fc3): Linear(in_features=32, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


In [39]:
# Train the model
import torch.optim as optim
if 'model' in locals() and 'X_train_tensor' in locals() and 'y_train_tensor' in locals() and 'device' in locals():
    # Define loss function and optimizer
    criterion = nn.BCELoss()  # Binary Cross-Entropy Loss for binary classification
    optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer with a learning rate of 0.001

    # Move model and data to the device (GPU or CPU)
    model.to(device)
    X_train_tensor = X_train_tensor.to(device)
    y_train_tensor = y_train_tensor.to(device)

    # Training loop
    epochs = 100  # Number of training iterations
    for epoch in range(epochs):
        # Forward pass
        outputs = model(X_train_tensor)
        loss = criterion(outputs, y_train_tensor)

        # Backward and optimize
        optimizer.zero_grad()  # Clear previous gradients
        loss.backward()        # Calculate gradients
        optimizer.step()       # Update model weights

        # Print training progress
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

    print('\nFinished Training')

Epoch [10/100], Loss: 0.5967
Epoch [20/100], Loss: 0.5297
Epoch [30/100], Loss: 0.4795
Epoch [40/100], Loss: 0.4433
Epoch [50/100], Loss: 0.4252
Epoch [60/100], Loss: 0.4150
Epoch [70/100], Loss: 0.4068
Epoch [80/100], Loss: 0.4050
Epoch [90/100], Loss: 0.4034
Epoch [100/100], Loss: 0.3960

Finished Training


In [None]:
# Evaluate the model

if 'model' in locals() and 'X_test_tensor' in locals() and 'y_test_tensor' in locals() and 'device' in locals():
  
    model.eval()
    X_test_tensor = X_test_tensor.to(device)
    y_test_tensor = y_test_tensor.to(device)

  
    with torch.no_grad():
        # Forward pass
        outputs = model(X_test_tensor)
        predicted_probs = outputs.squeeze()
        predicted_labels = (predicted_probs > 0.5).float() 

        # Calculate accuracy
        correct_predictions = (predicted_labels == y_test_tensor.squeeze()).sum().item()
        total_samples = y_test_tensor.size(0)
        accuracy = correct_predictions / total_samples

        print(f'\nEvaluation:')
        print(f'Accuracy on the test set: {accuracy:.4f}')

else:
    print("Error: Make sure the model, X_test_tensor, y_test_tensor, and device are defined before evaluation.")


Evaluation:
Accuracy on the test set: 0.8550
