# Data Preparation Step

* Import the needed libraries

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense
from tensorflow.keras import callbacks

* Load dataset from a CSV file

In [42]:
def load_data(file_path):
    return pd.read_csv(file_path)

* Split the dataset into features (X) and target (y)

In [43]:
def split_features_target(df, target_column):
    X = df.drop(columns=[target_column])
    y = df[target_column]
    return X, y

* Split dataset into training, validation, and test sets while maintaining class distribution

In [44]:
def split_train_validation_test(X, y, train_size=0.7, test_size=0.2, val_size=0.1, random_state=42):
    X_train, X_temp, y_train, y_temp = train_test_split(
        X, y, test_size=(1 - train_size), stratify=y, random_state=random_state
    )
    X_val, X_test, y_val, y_test = train_test_split(
        X_temp, y_temp, test_size=(test_size / (test_size + val_size)), stratify=y_temp, random_state=random_state
    )
    return X_train, X_val, X_test, y_train, y_val, y_test

* Print class distributions for training, validation, and test sets

In [6]:
def print_class_distribution(y_train, y_val, y_test):
    print("Training Class Distribution:\n", y_train.value_counts(normalize=True))
    print("Validation Class Distribution:\n", y_val.value_counts(normalize=True))
    print("Test Class Distribution:\n", y_test.value_counts(normalize=True))

* Perform one-hot encoding for categorical columns

In [7]:
def encode_categorical_columns(df, categorical_columns):
    return pd.get_dummies(df, columns=categorical_columns, dtype='uint8')

* Standardize numerical features using StandardScaler

In [8]:
def standardize_features(df):
    scaler = StandardScaler()
    return pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

* Main

In [9]:
def main():
    # Load dataset
    df = load_data("heart.csv")
    print(df.head())
    
    # Split features and target
    X, y = split_features_target(df, 'HeartDisease')
    
    # Define categorical columns to encode
    categorical_columns = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']
    df_encoded = encode_categorical_columns(X, categorical_columns)
    print("Encoded Columns:", df_encoded.columns)

    # Split data into train, validation, and test sets
    X_train, X_val, X_test, y_train, y_val, y_test = split_train_validation_test(df_encoded, y)
    
    # Validate class distribution
    print_class_distribution(y_train, y_val, y_test)
        
    # if needed, Standardize dataset
    df_standardized = standardize_features(df_encoded)
    print(df_standardized.head())

if __name__ == "__main__":
    main()

   Age Sex ChestPainType  RestingBP  Cholesterol  FastingBS RestingECG  MaxHR  \
0   40   M           ATA        140          289          0     Normal    172   
1   49   F           NAP        160          180          0     Normal    156   
2   37   M           ATA        130          283          0         ST     98   
3   48   F           ASY        138          214          0     Normal    108   
4   54   M           NAP        150          195          0     Normal    122   

  ExerciseAngina  Oldpeak ST_Slope  HeartDisease  
0              N      0.0       Up             0  
1              N      1.0     Flat             1  
2              N      0.0       Up             0  
3              Y      1.5     Flat             1  
4              N      0.0       Up             0  
Encoded Columns: Index(['Age', 'RestingBP', 'Cholesterol', 'FastingBS', 'MaxHR', 'Oldpeak',
       'Sex_F', 'Sex_M', 'ChestPainType_ASY', 'ChestPainType_ATA',
       'ChestPainType_NAP', 'ChestPainType_TA', 

# Bonus section

## Simple Feedforward Neural Network

In [None]:
class TrainingCallbacks:
    def __init__(self, early_stopping_patience=20, reduce_lr_patience=3,
                  reduce_lr_factor=0.2, min_lr=1e-5):
        self.early_stopping_patience = early_stopping_patience
        self.reduce_lr_patience = reduce_lr_patience
        self.reduce_lr_factor = reduce_lr_factor
        self.min_lr = min_lr

    def get_callbacks(self):
        # Early stopping callbacks
        es = callbacks.EarlyStopping(
            monitor='val_accuracy',
            patience=self.early_stopping_patience, # Stopping after certain number of epochs with no improvement
            verbose=1,
            mode='max', # because we want to maximize the validation accuracy
            restore_best_weights=True

        )

        # Learning rate reduction callbacks
        rp = callbacks.ReduceLROnPlateau(
            monitor='val_accuracy',
            factor=self.reduce_lr_factor, 
            patience=self.reduce_lr_patience, # wait for certain number of epochs before lr reduction
            verbose=1,
            mode="max",
            min_lr=self.min_lr
        )

        return [es, rp]


In [None]:
class FeedforwardNeuralNetwork:
    def __init__(self, input_size, hidden_neurons = 16, learning_rate = 0.001):
        self.input_size = input_size
        self.hidden_neurons = hidden_neurons
        self.learning_rate = learning_rate
        self.model = self.build_model()

    def build_model(self):
        model = Sequential()
        # Hidden layer
        model.add(Dense(self.hidden_neurons, activation='relu', input_shape=(self.input_size,)))
        # Output layer
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        return model
    
    def train(self, X_train, y_train, X_val, y_val, epochs=50, batch_size=32, callbacks=None):
        history = self.model.fit(
            X_train, y_train,
            epochs = epochs,
            batch_size=batch_size,
            validation_data=(X_val, y_val),
            callbacks=callbacks,
            verbose=1
        )
        return history
    
    def evaluate(self, X_test, y_test):
        loss, accuracy = self.model.evaluate(X_test, y_test,verbose=0)
        print("Test loss:",loss)
        print("Test accuracy:",accuracy)
        return loss,accuracy
    
    def predict(self,X):
        return self.model.predict(X)
