# ai4 for health related matters

## dependencies

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt

## dataset stuff

### read the dataset

In [2]:
df = pd.read_csv('dataset.csv')
df = df.sample(frac = 1)
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,diagnosis
117,35,0,4,138,183,0,0,182,0,1.4,1,0.0,3.0,0
136,70,1,4,145,174,0,0,125,1,2.6,3,0.0,7.0,4
86,47,1,3,138,257,0,2,156,0,0.0,1,0.0,3.0,0
154,64,1,4,120,246,0,2,96,1,2.2,3,1.0,3.0,3
224,63,0,4,108,269,0,0,169,1,1.8,2,2.0,3.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
226,47,1,4,112,204,0,0,143,0,0.1,1,0.0,3.0,0
130,54,1,3,120,258,0,2,147,0,0.4,2,0.0,7.0,0
24,60,1,4,130,206,0,2,132,1,2.4,2,2.0,7.0,4
235,54,1,4,122,286,0,2,116,1,3.2,2,2.0,3.0,3


### train_test split

In [3]:
ratio = 0.9 # 90:10 train:test ratio
train_set = df.sample(frac=ratio)

# Dropping all those indexes from the dataframe that exists in the train_set
test_set = df.drop(train_set.index)
train_set.shape, test_set.shape

train_set
test_set

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,diagnosis
117,35,0,4,138,183,0,0,182,0,1.4,1,0.0,3.0,0
273,71,0,4,112,149,0,0,125,0,1.6,2,0.0,3.0,0
29,40,1,4,110,167,0,2,114,1,2.0,2,0.0,7.0,3
159,68,1,3,118,277,0,0,151,0,1.0,1,1.0,7.0,0
105,54,1,2,108,309,0,0,156,0,0.0,1,0.0,7.0,0
94,63,0,3,135,252,0,2,172,0,0.0,1,0.0,3.0,0
17,54,1,4,140,239,0,0,160,0,1.2,1,0.0,3.0,0
56,50,1,3,140,233,0,0,163,0,0.6,2,1.0,7.0,1
162,54,0,3,110,214,0,0,158,0,1.6,2,0.0,3.0,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0.0,3.0,0


### formating

In [4]:
# import training dataset

# Clean column names
train_set.columns = train_set.columns.str.strip()

# Handle missing values represented by '?'
train_set = train_set.replace('?', np.nan)

# Convert columns to numeric, drop rows with NaNs
train_set = train_set.dropna()
train_set = train_set.astype(float)

# Split features and labels
X = train_set.drop("diagnosis", axis=1)
y = train_set["diagnosis"]

y = (y > 0.5).astype(float)  # Ensures only 0 or 1

# normalise featuress from minmax scaling (sklearn)
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Convert to PyTorch tensors
inputs = torch.tensor(X_scaled, dtype=torch.float32)
targets = torch.tensor(y.values, dtype=torch.float32)

print(inputs.shape, targets.shape)

torch.Size([268, 13]) torch.Size([268])


In [5]:
# Clean column names
test_set.columns = test_set.columns.str.strip()

# Handle missing values represented by '?'
test_set = test_set.replace('?', np.nan)

# Convert columns to numeric, drop rows with NaNs
test_set = test_set.dropna()
test_set = test_set.astype(float)

# Split features and labels
X_test = test_set.drop("diagnosis", axis=1)
y_test = test_set["diagnosis"]
y_test = (y_test > 0.5).astype(float)  # Ensures only 0 or 1

# Use the same scaler as for training
X_scaled = scaler.transform(X_test)

# Convert to PyTorch tensors
test_tensor = torch.tensor(X_scaled, dtype=torch.float32)
test_target = torch.tensor(y_test.values, dtype=torch.float32)

print(test_tensor.shape, test_target.shape)

torch.Size([29, 13]) torch.Size([29])


## AI

### setup/params

In [6]:
class SimpleNN(nn.Module): 
    def __init__(self): 
        super(SimpleNN, self).__init__() 
        self.fc1 = nn.Linear(13, 26)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(26, 32)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(32, 26)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(26, 1) #output layer

        
    def forward(self, x): 
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        x = self.fc4(x)
        return x  # No sigmoid here; use BCEWithLogitsLoss

model = SimpleNN()
print(model)

SimpleNN(
  (fc1): Linear(in_features=13, out_features=26, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=26, out_features=32, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=32, out_features=26, bias=True)
  (relu3): ReLU()
  (fc4): Linear(in_features=26, out_features=1, bias=True)
)


### training

### testing

## UI