# ai with pytorch for detecting heart disease in humans

## import dependencies

In [86]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

## dataset stuff

### import dataset csv

In [87]:
df = pd.read_csv('dataset.csv')
df = df.sample(frac=1)

#show table
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,diagnosis
56,50,1,3,140,233,0,0,163,0,0.6,2,1.0,7.0,1
63,54,0,3,135,304,1,0,170,0,0.0,1,0.0,3.0,0
72,62,1,4,120,267,0,0,99,1,1.8,2,2.0,7.0,1
58,54,1,3,125,273,0,2,152,0,0.5,3,1.0,3.0,0
144,58,1,3,105,240,0,2,154,1,0.6,2,0.0,7.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152,67,0,3,115,564,0,2,160,0,1.6,2,0.0,7.0,0
100,45,1,4,115,260,0,2,185,0,0.0,1,0.0,3.0,0
166,52,1,3,138,223,0,0,169,0,0.0,1,?,3.0,0
213,66,0,4,178,228,1,0,165,1,1.0,2,2.0,7.0,3


### train test split

In [88]:
ratio = 0.99
total_rows = df.shape[0]
train_size = int(total_rows*ratio)

# Split data into a test dataset and train dataset
train = df[0:train_size]
test = df[train_size:]

#convert to numpy arrays (so its autodiff compatible)
train_data = train.to_numpy()
test_data = test.to_numpy()

#check
print(df)

print(train)
print(test)

print(train_data)
print(test_data)

     age   sex   cp   trestbps   chol   fbs   restecg   thalach   exang  \
56    50     1    3        140    233     0         0       163       0   
63    54     0    3        135    304     1         0       170       0   
72    62     1    4        120    267     0         0        99       1   
58    54     1    3        125    273     0         2       152       0   
144   58     1    3        105    240     0         2       154       1   
..   ...   ...  ...        ...    ...   ...       ...       ...     ...   
152   67     0    3        115    564     0         2       160       0   
100   45     1    4        115    260     0         2       185       0   
166   52     1    3        138    223     0         0       169       0   
213   66     0    4        178    228     1         0       165       1   
121   63     0    4        150    407     0         2       154       0   

      oldpeak   slope   ca  thal   diagnosis  
56        0.6       2  1.0   7.0           1  
63   

## acc. ai stuff

### training

- using pytorch, numpy, and matplotlib (probably:) )
- using a high train test split ratio to get the best results

In [89]:
class SimpleNN(nn.Module): 
  def __init__(self): 
    super(SimpleNN, self).__init__() 
    self.fc1 = nn.Linear(13, 2)  # match 13 input features from your input
    self.relu = nn.ReLU()      # Activation function 
    self.fc2 = nn.Linear(2, 1)

  def forward(self, x): 
    x = self.fc1(x) 
    x = self.relu(x) 
    x = self.fc2(x) 
    return x 


In [90]:
model = SimpleNN() 

# outputs the struct. of the model
print(model) 

SimpleNN(
  (fc1): Linear(in_features=13, out_features=2, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=2, out_features=1, bias=True)
)


In [91]:
# sample data for training
train.columns = train.columns.str.strip()               # clean column name
inputs = train.drop("diagnosis", axis=1)                # Drop target column
inputs = inputs.apply(pd.to_numeric, errors='coerce')   # Convert all values to numeric, set errors='coerce' to handle '?'
inputs = inputs.fillna(inputs.mean())  # Fill NaNs with 0, safer than fillna(0)
inputs = torch.tensor(inputs.values)

# Process targets (assuming 'M'/'B' or similar)
targets = train["diagnosis"].map({"M": 1, "B": 0})
targets = torch.tensor(targets.values)

In [92]:
train = train.apply(pd.to_numeric, errors='coerce').fillna(0)

In [93]:
#calculate mean squared error
criterion = nn.MSELoss() 
optimiser = optim.SGD(model.parameters(), lr=0.01) 

In [None]:
#check datatypes
print(f"inputs dtype: {inputs.dtype}")
print(f"targets dtype: {targets.dtype}")
print(f"model parameter dtype: {next(model.parameters()).dtype}")

# Remove samples where targets are NaN
#mask = ~torch.isnan(targets).squeeze()  # shape: (299,)
#inputs = inputs[mask]
#targets = targets[mask]

#sorting datatypes...
inputs = inputs.float()
targets = targets.float()
model = model.float()

# Make sure data types match
inputs = inputs.float()
targets = targets.float().view(-1, 1)  # Ensure shape matches model output

# Sanity check
print(inputs.dtype, targets.dtype)  # should both be float32
print(inputs.shape, targets.shape)  # e.g., [299, 13] and [299, 1]

for epoch in range(1000):                 # Training for 5 epochs 
  optimiser.zero_grad()                  # Clear previous gradients 
  outputs = model(inputs)                # Forward pass 
  loss = criterion(outputs, targets)     # Calculate loss 
  loss.backward()                        # Backward pass to compute gradients 
  optimiser.step()                       # Update weights 
  print(f'Epoch [{epoch + 1}/1000], Loss: {loss.item():.4f}')
  if torch.isnan(loss):
    print("Loss is NaN!")
    break 
outputs

print(inputs[:5])
print(targets[:5])
print(model(inputs[:5]))
print(inputs.min(), inputs.max())


inputs dtype: torch.float64
targets dtype: torch.float64
model parameter dtype: torch.float32
torch.float32 torch.float32
torch.Size([0, 13]) torch.Size([0, 1])
Epoch [1/1000], Loss: nan
Loss is NaN!
tensor([], size=(0, 13))
tensor([], size=(0, 1))
tensor([], size=(0, 1), grad_fn=<AddmmBackward0>)


RuntimeError: min(): Expected reduction dim to be specified for input.numel() == 0. Specify the reduction dim with the 'dim' argument.