<a href="https://colab.research.google.com/github/Maruddo/AI-Notebooks/blob/main/heart_disease_model/heart_disease.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# HEART DISEASE MODEL - PYTORCH

## Import libraries

In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import TensorDataset, Dataset, DataLoader, random_split
from torchvision.transforms import ToTensor, Lambda
import torchvision.models as models

# Create Dataset from file

In [None]:
def import_dataset(dataset):
    dataset = np.genfromtxt(dataset,
                            delimiter = ',',
                            dtype=[('data', "float32", (13,)), ('target', "int64")],
                            usemask=True,
                            skip_header=1)
    data = dataset['data']
    target = dataset['target']
    original_labels = [
    "AGE", "SEX", "CP", "TRESTBPS", "CHOL", "FBS", "RESTECG", "THALACH", "EXANG", "OLDPEAK",  "SLOPE", "CA", "THAL"
    ]

    new_labels = (
        original_labels[2:] +  ["WOMEN", "MAN"] + ["0-30", "30-50", "50-70", "70-100"]
    )

    new_X = split_columns(data)

    new_X = (new_X - np.min(new_X, axis=0, keepdims=True)) / np.max(new_X, axis=0, keepdims=True)

    # Convert to PyTorch tensors

    features_tensor = torch.from_numpy(np.array(new_X))


    labels_tensor = torch.from_numpy(np.array(target))


    # Create a TensorDataset
    full_dataset = TensorDataset(features_tensor, labels_tensor)
    return full_dataset

def split_columns(data):
    bins = [0, 30, 50, 70, 100] #Límites de los rangos de edad hasta 100
    bins.sort() # Asegurarse de que los bins estén en orden creciente
    edad_rangos = np.digitize(data[:, 0], bins) - 1  # -1 para que los índices empiecen desde 0

    sexo_one_hot = np.eye(2, dtype=np.float32)[data[:, 1].astype(int)] #Crea las columnas [mujer, hombre]
    edad_one_hot = np.eye(len(bins) - 1, dtype=np.float32)[edad_rangos] #Crea columnas binarias para cada rango de edad

    data_without_sex_age = np.delete(data, [0, 1], axis=1) #Eliminar las columnas originales de edad y sexo

    new_X = np.hstack((data_without_sex_age, sexo_one_hot, edad_one_hot)) #Concatenar el dataset con las columnas codificadas
    return new_X

## Split the dataset

In [None]:
def split_dataset(full_dataset):
    train_size = int(0.8 * len(full_dataset))
    test_size = len(full_dataset) - train_size
    training_data, test_data = random_split(full_dataset, [train_size, test_size])

    return training_data, test_data

# Create DataLoaders

In [None]:
def create_dataloader(training_data, test_data,batch_size):
    train_dataloader = DataLoader(training_data, batch_size, shuffle=True)
    test_dataloader = DataLoader(test_data, batch_size, shuffle=False)

    return train_dataloader, test_dataloader

# Select Device

In [None]:
def select_device():
    device = (
        "cuda"
        if torch.cuda.is_available()
        else "mps"
        if torch.backends.mps.is_available()
        else "cpu"
    )
    return device

# Define the model

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(17, 100),
            nn.LeakyReLU(),
            nn.Linear(100, 35),
            nn.Dropout(0.3),
            nn.LeakyReLU(),
            nn.Linear(35, 2),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

def load_model(weights, device):
    model = NeuralNetwork().to(device)
    if os.path.exists(weights):
         try:
             model.load_state_dict(torch.load(weights, mmap=True, weights_only=True), assign=True)
         except:
             print("None weights were loaded")
    return model

# Train & Test

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer, device):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 25 == 0:
            loss, current = loss.item(), batch * batch_size + len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn, device):
    global best_weights, best_correct
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size

    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

    if best_correct < correct:
        best_correct = correct
        best_weights = model.state_dict()

# Personal data formulary

In [None]:
def enter_data(model, device):
    age = int(input("Enter your age: "))

    sex = int(input("Enter your sex (0 for female, 1 for male):"))

    chest_pain_type = int(input("Enter the type of chest pain (0-3): "))

    resting_blood_pressure = int(input("Enter your resting blood pressure (in mm Hg):"))

    cholesterol = int(input("Enter your cholesterol level (in mg/dl): "))

    fasting_blood_sugar = int(input("Enter your fasting blood sugar level (1 if > 120 mg/dl, 0 if not): "))

    rest_ecg = int(input("Enter the resting electrocardiographic results (0-2): "))

    max_heart_rate = int(input("Enter your maximum heart rate achieved: "))

    exercise_induced_angina = int(input("Did you have exercise-induced angina? (1 for yes, 0 for no): "))

    oldpeak = float(input("Enter the ST depression value induced by exercise (oldpeak): "))

    st_slope = int(input("Enter the slope of the peak exercise ST segment (0-2): "))

    major_vessels = int(input("Enter the number of major vessels colored by fluoroscopy (0-3): "))

    thalassemia = int(input("Enter the type of thalassemia (1 = normal, 2 = fixed defect, 3 = reversible defect): "))

    input_data = [age, sex, chest_pain_type, resting_blood_pressure, cholesterol, fasting_blood_sugar, rest_ecg,
                  max_heart_rate, exercise_induced_angina, oldpeak, st_slope, major_vessels, thalassemia]

    input_data = personal_data_split(input_data)
    input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(input_tensor)
        probabilities = torch.softmax(output, dim=1)
        predicted_class = probabilities.argmax(1).item()

    return predicted_class

def personal_data_split(data):
    rangos = [0, 30, 50, 70, 100]
    edad_rangos = np.digitize([data[0]], rangos) - 1
    sexo_one_hot = np.eye(2)[[data[1]]]
    edad_one_hot = np.eye(len(rangos) - 1)[edad_rangos]

    processed_data = np.hstack((data[2:], sexo_one_hot[0], edad_one_hot[0]))

    return processed_data

# Main Function

In [None]:
def __main__(dataset,learning_rate,batch_size,epochs):
    device = select_device()
    model = load_model('model_weights.pth', device)
    loss_fn = nn.CrossEntropyLoss()
    train_on = input("Do you want to train? Yes(y), No(enter)")
    if train_on:
        full_dataset = import_dataset(dataset)
        training_data, test_data = split_dataset(full_dataset)
        train_dataloader, test_dataloader = create_dataloader(training_data,test_data,batch_size)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        next_train = True
        while next_train:
            for t in range(epochs):
                print(f"Epoch {t+1}\n-------------------------------")
                train_loop(train_dataloader, model, loss_fn, optimizer, device)
                test_loop(test_dataloader, model, loss_fn, device)
            print("Done! Saving process...")
            torch.save(best_weights, 'model_weights.pth')
            print(f"Best Accuracy: {(100*best_correct):>0.1f}% Using Learning Rate: {learning_rate}")
            stop_train = input("Should we continue training? No(Any) Yes(Enter) -> ")
            if stop_train:
                next_train = False
            else:
                change_lr = input("Change the learning rate? Yes(Any) No(Enter) -> ")
                if change_lr:
                    try:
                        learning_rate=float(input("New learning rate: "))
                    except:
                        print("Ignoring new learning rate")
                change_epochs = input(f"Change the number of epochs ({epochs})? Yes(Any) No(Enter) -> ")
                if change_epochs:
                    try:
                        epochs=int(input("New number of epochs: "))
                    except:
                        print("Ignoring new learning rate")
    else:
        print(f"Predicted class: {enter_data(model, device)}")

# Call main

In [None]:
if __name__ == '__main__':
    best_correct = 0
    best_weights = None
    dataset = 'dataset/heart.csv'
    learning_rate = 1e-3
    batch_size = 1025
    epochs = 30
    __main__(dataset,learning_rate,batch_size,epochs)

Do you want to train? Yes(y), No(enter) y


Epoch 1
-------------------------------
loss: 0.353779  [  820/  820]
Test Error: 
 Accuracy: 90.7%, Avg loss: 0.258390 

Epoch 2
-------------------------------
loss: 0.360204  [  820/  820]
Test Error: 
 Accuracy: 89.8%, Avg loss: 0.259477 

Epoch 3
-------------------------------
loss: 0.350064  [  820/  820]
Test Error: 
 Accuracy: 89.8%, Avg loss: 0.260721 

Epoch 4
-------------------------------
loss: 0.350342  [  820/  820]
Test Error: 
 Accuracy: 89.8%, Avg loss: 0.260900 

Epoch 5
-------------------------------
loss: 0.347602  [  820/  820]
Test Error: 
 Accuracy: 90.7%, Avg loss: 0.261014 

Epoch 6
-------------------------------
loss: 0.347604  [  820/  820]
Test Error: 
 Accuracy: 90.7%, Avg loss: 0.261054 

Epoch 7
-------------------------------
loss: 0.341300  [  820/  820]
Test Error: 
 Accuracy: 90.7%, Avg loss: 0.261000 

Epoch 8
-------------------------------
loss: 0.340317  [  820/  820]
Test Error: 
 Accuracy: 90.7%, Avg loss: 0.260801 

Epoch 9
----------------