# ai with pytorch for detecting heart disease in humans

## import dependencies

In [72]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

## dataset stuff

### import dataset csv

In [73]:
df = pd.read_csv('dataset.csv')
df = df.sample(frac=1)

#show table
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,diagnosis
251,58,1,4,146,218,0,0,105,0,2.0,2,1.0,7.0,1
14,52,1,3,172,199,1,0,162,0,0.5,1,0.0,7.0,0
219,59,1,4,138,271,0,2,182,0,0.0,1,0.0,3.0,0
55,54,1,4,124,266,0,2,109,1,2.2,2,1.0,7.0,1
163,58,0,4,100,248,0,2,122,0,1.0,2,0.0,3.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,63,1,4,130,330,1,2,132,1,1.8,1,3.0,7.0,3
79,58,1,4,150,270,0,2,111,1,0.8,1,0.0,7.0,3
114,62,0,3,130,263,0,0,97,0,1.2,2,1.0,7.0,2
81,53,0,4,130,264,0,2,143,0,0.4,2,0.0,3.0,0


### train test split

In [74]:
ratio = 0.99
total_rows = df.shape[0]
train_size = int(total_rows*ratio)

# Split data into a test dataset and train dataset
train = df[0:train_size]
test = df[train_size:]

#convert to numpy arrays (so its autodiff compatible)
train_data = train.to_numpy()
test_data = test.to_numpy()

#check
print(df)

print(train)
print(test)

print(train_data)
print(test_data)

     age   sex   cp   trestbps   chol   fbs   restecg   thalach   exang  \
251   58     1    4        146    218     0         0       105       0   
14    52     1    3        172    199     1         0       162       0   
219   59     1    4        138    271     0         2       182       0   
55    54     1    4        124    266     0         2       109       1   
163   58     0    4        100    248     0         2       122       0   
..   ...   ...  ...        ...    ...   ...       ...       ...     ...   
118   63     1    4        130    330     1         2       132       1   
79    58     1    4        150    270     0         2       111       1   
114   62     0    3        130    263     0         0        97       0   
81    53     0    4        130    264     0         2       143       0   
43    59     1    3        150    212     1         0       157       0   

      oldpeak   slope   ca  thal   diagnosis  
251       2.0       2  1.0   7.0           1  
14   

## acc. ai stuff

### training

- using pytorch, numpy, and matplotlib (probably:) )
- using a high train test split ratio to get the best results

In [75]:
class SimpleNN(nn.Module): 
  def __init__(self): 
    super(SimpleNN, self).__init__() 
    self.fc1 = nn.Linear(13, 2)  # match 13 input features from your input
    self.relu = nn.ReLU()      # Activation function 
    self.fc2 = nn.Linear(2, 1)

  def forward(self, x): 
    x = self.fc1(x) 
    x = self.relu(x) 
    x = self.fc2(x) 
    return x 


In [76]:
model = SimpleNN() 

# outputs the struct. of the model
print(model) 

SimpleNN(
  (fc1): Linear(in_features=13, out_features=2, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=2, out_features=1, bias=True)
)


In [77]:
# sample data for training
train.columns = train.columns.str.strip()               # clean column name
inputs = train.drop("diagnosis", axis=1)                # Drop target column
inputs = inputs.apply(pd.to_numeric, errors='coerce')   # Convert all values to numeric, set errors='coerce' to handle '?'
inputs = inputs.fillna(inputs.mean())  # Fill NaNs with 0, safer than fillna(0)
inputs = torch.tensor(inputs.values)

# Process targets (assuming 'M'/'B' or similar)
targets = train["diagnosis"].map({"M": 1, "B": 0})
targets = torch.tensor(targets.values)

In [78]:
train = train.apply(pd.to_numeric, errors='coerce').fillna(0)

In [79]:
#calculate mean squared error
criterion = nn.MSELoss() 
optimiser = optim.SGD(model.parameters(), lr=0.01) 

In [83]:
#check datatypes
print(f"inputs dtype: {inputs.dtype}")
print(f"targets dtype: {targets.dtype}")
print(f"model parameter dtype: {next(model.parameters()).dtype}")

#sorting datatypes...
inputs = inputs.float()
targets = targets.float()
model = model.float()

# Make sure data types match
inputs = inputs.float()
targets = targets.float().view(-1, 1)  # Ensure shape matches model output

# Sanity check
print(inputs.dtype, targets.dtype)  # should both be float32
print(inputs.shape, targets.shape)  # e.g., [299, 13] and [299, 1]

for epoch in range(1000):                 # Training for 5 epochs 
  optimiser.zero_grad()                  # Clear previous gradients 
  outputs = model(inputs)                # Forward pass 
  loss = criterion(outputs, targets)     # Calculate loss 
  loss.backward()                        # Backward pass to compute gradients 
  optimiser.step()                       # Update weights 
  print(f'Epoch [{epoch + 1}/1000], Loss: {loss.item():.4f}')
  if torch.isnan(loss):
    print("Loss is NaN!")
    break 
outputs

inputs dtype: torch.float32
targets dtype: torch.float32
model parameter dtype: torch.float32
torch.float32 torch.float32
torch.Size([299, 13]) torch.Size([299, 1])
Epoch [1/1000], Loss: nan
Loss is NaN!


tensor([[nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [n