In [2]:
## import necessary libraries
import numpy as np
import pandas as pd

## read the raw data file
df = pd.read_csv('../raw_data//diabetes_012_health_indicators_BRFSS2015.csv')

# drop duplicates
df.drop_duplicates(inplace=True)

# convert data types
df[['Diabetes_012','GenHlth', 'Age', 'Education','Income']] = df[['Diabetes_012','GenHlth', 'Age', 'Education','Income']].astype(int)

In [3]:
#slice the dataframe for feature and label
df_copy = df.copy(deep=True)
X, y = df_copy.iloc[:,1:], df_copy.iloc[:,0]

#get dummies
def get_dummies(X):

    # Identify discrete columns with more than 2 categories
    discrete_columns = [col for col in X.columns if X[col].nunique() > 2 and X[col].dtype != 'float64']

    # Create dummy variables for these columns
    df_with_dummies = pd.get_dummies(X, columns=discrete_columns, drop_first=True)

    return df_with_dummies
X = get_dummies(X)

In [4]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc
import matplotlib.pyplot as plt

## Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

## Standard scaling
from sklearn.preprocessing import StandardScaler

scalar = StandardScaler()
X_scaled = scalar.fit_transform(X_train)
X_scaled_test = scalar.fit_transform(X_test)

In [6]:
##neural networks
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import recall_score

from torch.utils.data import TensorDataset, DataLoader

# Assuming X and y are your features and labels respectively
# Convert your data to PyTorch tensors if they are not already

X_train_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.to_numpy(), dtype=torch.int64)

X_test_tensor = torch.tensor(X_scaled_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.to_numpy(), dtype=torch.int64)

# Define the neural network
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.layer1 = nn.Linear(45, 64)
        self.layer2 = nn.Linear(64, 128)
        self.layer3 = nn.Linear(128, 64)
        self.output_layer = nn.Linear(64, 3)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.output_layer(x)
        return x

In [13]:
# Initialize the network
net = NeuralNetwork()

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    
    optimizer.zero_grad()
    outputs = net(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    # Calculate training macro-recall
    _, predicted = torch.max(outputs.data, 1)
    train_recall = recall_score(y_train_tensor.numpy(), predicted.numpy(), average='macro')

    # Test the model
    with torch.no_grad():
        test_outputs = net(X_test_tensor)
        _, predicted_test = torch.max(test_outputs.data, 1)
        test_recall = recall_score(y_test_tensor.numpy(), predicted_test.numpy(), average='macro')

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}, Training Macro-Recall: {train_recall:.4f}, Test Macro-Recall: {test_recall:.4f}')


Epoch 1/1000, Loss: 1.1419, Training Macro-Recall: 0.3332, Test Macro-Recall: 0.3324
Epoch 2/1000, Loss: 1.1167, Training Macro-Recall: 0.3325, Test Macro-Recall: 0.3197
Epoch 3/1000, Loss: 1.0928, Training Macro-Recall: 0.3220, Test Macro-Recall: 0.3347
Epoch 4/1000, Loss: 1.0693, Training Macro-Recall: 0.3307, Test Macro-Recall: 0.3333
Epoch 5/1000, Loss: 1.0457, Training Macro-Recall: 0.3330, Test Macro-Recall: 0.3333
Epoch 6/1000, Loss: 1.0215, Training Macro-Recall: 0.3333, Test Macro-Recall: 0.3333
Epoch 7/1000, Loss: 0.9961, Training Macro-Recall: 0.3333, Test Macro-Recall: 0.3333
Epoch 8/1000, Loss: 0.9693, Training Macro-Recall: 0.3333, Test Macro-Recall: 0.3333
Epoch 9/1000, Loss: 0.9406, Training Macro-Recall: 0.3333, Test Macro-Recall: 0.3333
Epoch 10/1000, Loss: 0.9100, Training Macro-Recall: 0.3333, Test Macro-Recall: 0.3333
Epoch 11/1000, Loss: 0.8773, Training Macro-Recall: 0.3333, Test Macro-Recall: 0.3333
Epoch 12/1000, Loss: 0.8427, Training Macro-Recall: 0.3333, Tes

KeyboardInterrupt: 