# Import necessary libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Configuration class
# Centralized configuration to easily manage hyperparameters, file paths, and other settings.


In [3]:
class Config:
    DATA_URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
    MODEL_SAVE_PATH = "/content/ft_transformer_model.pth"
    RANDOM_STATE = 42
    TEST_SIZE = 0.2
    BATCH_SIZE = 64
    LEARNING_RATE = 0.001
    NUM_EPOCHS = 500
    INPUT_DIM = 13
    NUM_CLASSES = 2

# Load, preprocess, and engineer features

In [4]:
def load_data(url):
    column_names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
                    'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']
    df = pd.read_csv(url, names=column_names, na_values='?')
    return df

def preprocess_data(df):
    df = df.dropna()
    df['target'] = df['target'].apply(lambda x: 1 if x > 0 else 0)
    return df

def engineer_features(df):
    df['age_group'] = pd.cut(df['age'], bins=[0, 40, 60, 80, 100], labels=['young', 'middle', 'senior', 'elderly'])
    df['age_thalach'] = df['age'] * df['thalach']
    df['thalach_bin'] = pd.qcut(df['thalach'], q=4, labels=['low', 'medium-low', 'medium-high', 'high'])
    df = pd.get_dummies(df, columns=['age_group', 'thalach_bin', 'sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal'])
    return df

def split_and_scale_data(df):
    X = df.drop('target', axis=1)
    y = df['target']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=Config.TEST_SIZE, random_state=Config.RANDOM_STATE)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    return X_train_scaled, X_test_scaled, y_train, y_test

# Split and scale data

In [5]:
def load_data(url):
    column_names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
                    'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']
    df = pd.read_csv(url, names=column_names, na_values='?')
    return df

def preprocess_data(df):
    df = df.dropna()
    df['target'] = df['target'].apply(lambda x: 1 if x > 0 else 0)
    return df

def engineer_features(df):
    df['age_group'] = pd.cut(df['age'], bins=[0, 40, 60, 80, 100], labels=['young', 'middle', 'senior', 'elderly'])
    df['age_thalach'] = df['age'] * df['thalach']
    df['thalach_bin'] = pd.qcut(df['thalach'], q=4, labels=['low', 'medium-low', 'medium-high', 'high'])
    df = pd.get_dummies(df, columns=['age_group', 'thalach_bin', 'sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal'])
    return df

def split_and_scale_data(df):
    X = df.drop('target', axis=1)
    y = df['target']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=Config.TEST_SIZE, random_state=Config.RANDOM_STATE)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    return X_train_scaled, X_test_scaled, y_train, y_test

# Define the FTTransformer model and create a model function

In [6]:
class FTTransformer(nn.Module):
    def __init__(self, input_dim, num_classes, d_model=64, nhead=4, num_layers=3, dim_feedforward=256, dropout=0.1):
        super(FTTransformer, self).__init__()

        self.embedding = nn.Linear(input_dim, d_model)

        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model,
                                                   nhead=nhead,
                                                   dim_feedforward=dim_feedforward,
                                                   dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        self.fc = nn.Linear(d_model, num_classes)

    def forward(self, x):
        x = self.embedding(x).unsqueeze(1)
        x = self.transformer_encoder(x)
        x = x.mean(dim=1)
        x = self.fc(x)
        return x

def create_model():
    return FTTransformer(input_dim=Config.INPUT_DIM, num_classes=Config.NUM_CLASSES)

# Train the model

In [7]:
def train_model(X_train, y_train):
    model = create_model()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=Config.LEARNING_RATE)

    train_dataset = TensorDataset(torch.FloatTensor(X_train), torch.LongTensor(y_train))
    train_loader = DataLoader(train_dataset, batch_size=Config.BATCH_SIZE, shuffle=True)

    for epoch in range(Config.NUM_EPOCHS):
        model.train()
        total_loss = 0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1}/{Config.NUM_EPOCHS}, Loss: {avg_loss:.4f}")

    torch.save(model.state_dict(), Config.MODEL_SAVE_PATH)
    return model

# Evaluate the model

In [8]:
def evaluate_model(model, X_test, y_test):
    model.eval()
    with torch.no_grad():
        outputs = model(torch.FloatTensor(X_test))
        _, predicted = torch.max(outputs, 1)

    predicted = predicted.numpy()

    accuracy = accuracy_score(y_test, predicted)
    report = classification_report(y_test, predicted)
    conf_matrix = confusion_matrix(y_test, predicted)

    print(f"Accuracy: {accuracy:.4f}")
    print("\nClassification Report:")
    print(report)
    print("\nConfusion Matrix:")
    print(conf_matrix)

    return accuracy, report, conf_matrix

# Prediction functions

In [9]:
def predict(model, X_new):
    model.eval()
    with torch.no_grad():
        outputs = model(torch.FloatTensor(X_new))
        _, predicted = torch.max(outputs, 1)
    return predicted.numpy()

def predict_proba(model, X_new):
    model.eval()
    with torch.no_grad():
        outputs = model(torch.FloatTensor(X_new))
        probabilities = torch.softmax(outputs, dim=1)
    return probabilities.numpy()

#Plot confusion matrix

In [10]:
def plot_confusion_matrix(conf_matrix):
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.show()

# Main function to orchestrate the workflow

In [None]:
def main():
    # Load and preprocess data
    print("Loading and preprocessing data...")
    df = load_data(Config.DATA_URL)
    df = preprocess_data(df)
    df = engineer_features(df)

    # Split and scale data
    print("Splitting and scaling data...")
    X_train_scaled, X_test_scaled, y_train, y_test = split_and_scale_data(df)

    # Update input dimension based on engineered features
    Config.INPUT_DIM = X_train_scaled.shape[1]

    # Train model
    print("Training model...")
    model = train_model(X_train_scaled, y_train)

    # Evaluate model
    print("Evaluating model...")
    accuracy, report, conf_matrix = evaluate_model(model, X_test_scaled, y_test)

    # Plot confusion matrix
    plot_confusion_matrix(conf_matrix)

    # Example prediction
    print("Making example predictions...")
    new_data = X_test_scaled[:5]  # Just using first 5 test samples as an example
    predictions = predict(model, new_data)
    probabilities = predict_proba(model, new_data)

    print("Example predictions:", predictions)
    print("Prediction probabilities:")
    for i, prob in enumerate(probabilities):
        print(f"Sample {i+1}: Class 0: {prob[0]:.4f}, Class 1: {prob[1]:.4f}")

if __name__ == "__main__":
    main()