Dataset Extraction and Overview

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#Cardiovascular Disease Dataset
df = pd.read_csv(r'c:\Users\omarf\Downloads\supp4-3436020.csv')
df.head()

In [None]:
df.info()
df.isnull().sum()   

In [None]:
#Public Health Dataset
df_2 = pd.read_csv(r'c:\Users\omarf\Downloads\supp3-3436020.csv')
df_2.head()

In [None]:
df_2.info()
df_2.isnull().sum()

In [None]:
#Heart Attack Dataset
df_3 = pd.read_csv(r'c:\Users\omarf\Downloads\supp2-3436020.csv')
df_3.head()

In [None]:
df_3.info()
df_3.isnull().sum() 

In [None]:
#Heart Failure Prediction Dataset
df_4 = pd.read_csv(r'c:\Users\omarf\Downloads\supp1-3436020.csv')
df_4.head() 

In [None]:
df_4.info()
df_4.isnull().sum()

Exploratory Data Analysis

In [None]:
sns.heatmap(df.select_dtypes(include=[np.number]).corr())   #Cardiovascular Disease Dataset

In [None]:
sns.heatmap(df_2.select_dtypes(include=[np.number]).corr())  #Public Health Dataset 

In [None]:
sns.heatmap(df_3.select_dtypes(include=[np.number]).corr())  #Heart Attack Dataset

In [None]:
sns.heatmap(df_4.select_dtypes(include=[np.number]).corr())  #Heart Attack Dataset

In [None]:
df.describe()  #Cardiovascular Disease Dataset
df_2.describe()  #Public Health Dataset
df_3.describe()  #Heart Attack Dataset
df_4.describe()  #Heart Failure Prediction Dataset

In [None]:
# Mapping the gender in the public health dataset   
df_2['sex'] = df_2['sex'].map({1: 'Male', 0: 'Female'})
df_2.head()

#Plot the bar chart
heart_disease_by_sex = df_2[df_2['target']==1]['sex'].value_counts()
plt.figure(figsize=(8,6))  
heart_disease_by_sex.plot(kind='bar', color=['blue', 'red'])
plt.title('Heart Disease Patients by Gender in the Public Health Dataset')
plt.xlabel('Gender')
plt.ylabel('Number of Patients')
plt.xticks(rotation=0)
plt.show()

In [None]:
# Map gender to numerical values (Male: 1, Female: 0)
heart_failure_by_sex = df_4[df_4['target'] == 1]['sex'].value_counts()
heart_failure_by_sex_numeric = heart_failure_by_sex.rename(index={1: 'Male', 0: 'Female'})

# Plot the bar chart
plt.figure(figsize=(8, 6))
heart_failure_by_sex_numeric.plot(kind='bar', color=['blue', 'red'])
plt.title('Heart Failure by Gender of Patients in the Heart Failure Prediction Dataset')
plt.xlabel('Gender (Male: 1, Female: 0)')
plt.ylabel('Number of Patients')
plt.xticks(rotation=0)
plt.show()

In [None]:
#Correlation Matrix for the numeric columns in the Cardiovascular Disease Dataset
numeric_df = df.select_dtypes(include=[np.number])
corr_matrix = numeric_df.corr()

plt.figure(figsize=(12, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix for the Cardiovascular Disease Dataset')
plt.show()

In [None]:
#Correlation Matrix for the numeric columns in the Public Health Dataset
numeric_df_2 = df_2.select_dtypes(include=[np.number])
corr_matrix_2 = numeric_df_2.corr()

plt.figure(figsize=(12, 8))
sns.heatmap(corr_matrix_2, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix for the Public Health Dataset')
plt.show()

In [None]:
#Correlation Matrix for the numeric columns in the Heart Attack Dataset
numeric_df_3 = df_3.select_dtypes(include=[np.number])
corr_matrix_3 = numeric_df_3.corr()

plt.figure(figsize=(12, 8))
sns.heatmap(corr_matrix_3, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix for the Heart Attack Dataset')
plt.show()

In [None]:
#Correlation Matrix for the numeric columns in the Heart Failure Prediction Dataset

numeric_df_4 = df.select_dtypes(include=[np.number])
corr_matrix_4 = numeric_df_4.corr()

plt.figure(figsize=(12, 8))
plt.title('Correlation Matrix for the Heart Failure Prediction Dataset')
sns.heatmap(corr_matrix_4, annot=True, cmap='coolwarm', fmt='.2f')
plt.show()

Implementing ML models in the Datasets without Feature Engineering

In [None]:
#Implementing Machine Learning Models in the Cardiovascular Disease Dataset

# Importing necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler

# Splitting the dataset into features and target
X = df.drop(columns=['HeartDisease'])
X = pd.get_dummies(X, drop_first=True)  # One-hot encoding for categorical variables
y = df['HeartDisease']

# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()  
X_train_scaled = scaler.fit_transform(X_train)  
X_test_scaled = scaler.transform(X_test)  


# Models to evaluate
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "SVM": SVC(probability=True),
    "KNN": KNeighborsClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# Metrics dictionary
metrics = {
    "Model": [],
    "Accuracy": [],
    "Precision": [],
    "Recall": [],
    "F1 Score": [],
    "AUC": []
}

# Training and evaluating each model
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

    metrics["Model"].append(name)
    metrics["Accuracy"].append(accuracy_score(y_test, y_pred) * 100)
    metrics["Precision"].append(precision_score(y_test, y_pred) * 100)
    metrics["Recall"].append(recall_score(y_test, y_pred) * 100)
    metrics["F1 Score"].append(f1_score(y_test, y_pred) * 100)
    metrics["AUC"].append(roc_auc_score(y_test, y_proba) * 100 if y_proba is not None else None)

# Creating a DataFrame for the results
results = pd.DataFrame(metrics)
print(results)

In [None]:
#Implementing Machine Learning Models in the Public Health Dataset

# Importing necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler

# Splitting the dataset into features and target
X = df_2.drop(columns=['target'])
X = pd.get_dummies(X, drop_first=True)  # One-hot encoding for categorical variables
y = df_2['target']

# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Models to evaluate
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "SVM": SVC(probability=True),
    "KNN": KNeighborsClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# Metrics dictionary
metrics = {
    "Model": [],
    "Accuracy": [],
    "Precision": [],
    "Recall": [],
    "F1 Score": [],
    "AUC": []
}

# Training and evaluating each model
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

    metrics["Model"].append(name)
    metrics["Accuracy"].append(accuracy_score(y_test, y_pred) * 100)
    metrics["Precision"].append(precision_score(y_test, y_pred) * 100)
    metrics["Recall"].append(recall_score(y_test, y_pred) * 100)
    metrics["F1 Score"].append(f1_score(y_test, y_pred) * 100)
    metrics["AUC"].append(roc_auc_score(y_test, y_proba) * 100 if y_proba is not None else None)

# Creating a DataFrame for the results
results = pd.DataFrame(metrics)
print(results)

In [None]:
#Implement Machine Learning Models in the Heart Attack Dataset

# Importing necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler

# Splitting the dataset into features and target
X = df_4.drop(columns=['target'])
X = pd.get_dummies(X, drop_first=True)  # One-hot encoding for categorical variables
y = df_4['target']

# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Models to evaluate
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "SVM": SVC(probability=True),
    "KNN": KNeighborsClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# Metrics dictionary
metrics = {
    "Model": [],
    "Accuracy": [],
    "Precision": [],
    "Recall": [],
    "F1 Score": [],
    "AUC": []
}

# Training and evaluating each model
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

    metrics["Model"].append(name)
    metrics["Accuracy"].append(accuracy_score(y_test, y_pred) * 100)
    metrics["Precision"].append(precision_score(y_test, y_pred) * 100)
    metrics["Recall"].append(recall_score(y_test, y_pred) * 100)
    metrics["F1 Score"].append(f1_score(y_test, y_pred) * 100)
    metrics["AUC"].append(roc_auc_score(y_test, y_proba) * 100 if y_proba is not None else None)

# Creating a DataFrame for the results
results = pd.DataFrame(metrics)
print(results)

In [None]:
#Implement Machine Learning Models in the Heart Failure Prediction Dataset

# Importing necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler

# Splitting the dataset into features and target
X = df_3.drop(columns=['cardio'])
X = pd.get_dummies(X, drop_first=True)  # One-hot encoding for categorical variables
y = df_3['cardio']

# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Models to evaluate
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "SVM": SVC(probability=True),
    "KNN": KNeighborsClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# Metrics dictionary
metrics = {
    "Model": [],
    "Accuracy": [],
    "Precision": [],
    "Recall": [],
    "F1 Score": [],
    "AUC": []
}

# Training and evaluating each model
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

    metrics["Model"].append(name)
    metrics["Accuracy"].append(accuracy_score(y_test, y_pred) * 100)
    metrics["Precision"].append(precision_score(y_test, y_pred) * 100)
    metrics["Recall"].append(recall_score(y_test, y_pred) * 100)
    metrics["F1 Score"].append(f1_score(y_test, y_pred) * 100)
    metrics["AUC"].append(roc_auc_score(y_test, y_proba) * 100 if y_proba is not None else None)

# Creating a DataFrame for the results
results = pd.DataFrame(metrics)
print(results)

Implementing DL algorithms in the dataset without feature engineering

In [None]:
#Implementing Neural Networks in the Cardiovascular Disease Dataset

#Importing necessary libraries
import tensorflow as tf
Sequential = tf.keras.models.Sequential
Dense = tf.keras.layers.Dense
Dropout = tf.keras.layers.Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

# Prepare the data from df
X_nn = df.drop(columns=['HeartDisease'])
y_nn = df['HeartDisease']
# One-hot encode categorical columns
X_nn = pd.get_dummies(X_nn, drop_first=True)

# Split the dataset into training and testing sets
X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(X_nn, y_nn, test_size=0.2, random_state=42)

# Scale the features
scaler_nn = StandardScaler()
X_train_nn_scaled = scaler_nn.fit_transform(X_train_nn)
X_test_nn_scaled = scaler_nn.transform(X_test_nn)

# Define a function to compile, train and evaluate a model
def build_and_evaluate(model_arch, epochs=50, batch_size=16):
    model = Sequential(model_arch + [Dense(1, activation='sigmoid')])
    model.compile(optimizer='adam', loss='binary_crossentropy')
    model.fit(X_train_nn_scaled, y_train_nn, epochs=epochs, batch_size=batch_size, verbose=0)
    
    # Predictions
    y_proba = model.predict(X_test_nn_scaled).ravel()
    y_pred = (y_proba > 0.5).astype(int)
    
    # Metrics (multiplied by 100)
    acc = accuracy_score(y_test_nn, y_pred) * 100
    prec = precision_score(y_test_nn, y_pred) * 100
    rec = recall_score(y_test_nn, y_pred) * 100
    f1 = f1_score(y_test_nn, y_pred) * 100
    auc = roc_auc_score(y_test_nn, y_proba) * 100
    
    return acc, prec, rec, f1, auc

results = {'Model': [], 'Accuracy': [], 'Precision': [], 'Recall': [], 'F1 Score': [], 'AUC': []}

# 1. DNN: a deeper network with 3 hidden layers
dnn_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu'),
    Dense(16, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(dnn_arch, epochs=100)
results['Model'].append('DNN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 2. ANN: a shallow (single hidden layer) network
ann_arch = [
    Dense(32, activation='relu', input_dim=X_train_nn_scaled.shape[1])
]
acc, prec, rec, f1, auc = build_and_evaluate(ann_arch, epochs=100)
results['Model'].append('ANN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 3. MLP: a two-hidden layer network
mlp_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(mlp_arch, epochs=100)
results['Model'].append('MLP')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# Create a results DataFrame and print
results_df = pd.DataFrame(results)
print(results_df)

In [None]:
#Implementing Neural Networks in the Public Health Dataset

#Importing necessary libraries
import tensorflow as tf
Sequential = tf.keras.models.Sequential
Dense = tf.keras.layers.Dense
Dropout = tf.keras.layers.Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

# Prepare the data from df_2
X_nn = df_2.drop(columns=['target'])
y_nn = df_2['target']
# One-hot encode categorical columns
X_nn = pd.get_dummies(X_nn, drop_first=True)

# Split the dataset into training and testing sets
X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(X_nn, y_nn, test_size=0.2, random_state=42)

# Scale the features
scaler_nn = StandardScaler()
X_train_nn_scaled = scaler_nn.fit_transform(X_train_nn)
X_test_nn_scaled = scaler_nn.transform(X_test_nn)

# Define a function to compile, train and evaluate a model
def build_and_evaluate(model_arch, epochs=50, batch_size=16):
    model = Sequential(model_arch + [Dense(1, activation='sigmoid')])
    model.compile(optimizer='adam', loss='binary_crossentropy')
    model.fit(X_train_nn_scaled, y_train_nn, epochs=epochs, batch_size=batch_size, verbose=0)
    
    # Predictions
    y_proba = model.predict(X_test_nn_scaled).ravel()
    y_pred = (y_proba > 0.5).astype(int)
    
    # Metrics (multiplied by 100)
    acc = accuracy_score(y_test_nn, y_pred) * 100
    prec = precision_score(y_test_nn, y_pred) * 100
    rec = recall_score(y_test_nn, y_pred) * 100
    f1 = f1_score(y_test_nn, y_pred) * 100
    auc = roc_auc_score(y_test_nn, y_proba) * 100
    
    return acc, prec, rec, f1, auc

results = {'Model': [], 'Accuracy': [], 'Precision': [], 'Recall': [], 'F1 Score': [], 'AUC': []}

# 1. DNN: a deeper network with 3 hidden layers
dnn_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu'),
    Dense(16, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(dnn_arch, epochs=100)
results['Model'].append('DNN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 2. ANN: a shallow (single hidden layer) network
ann_arch = [
    Dense(32, activation='relu', input_dim=X_train_nn_scaled.shape[1])
]
acc, prec, rec, f1, auc = build_and_evaluate(ann_arch, epochs=100)
results['Model'].append('ANN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 3. MLP: a two-hidden layer network
mlp_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(mlp_arch, epochs=100)
results['Model'].append('MLP')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# Create a results DataFrame and print
results_df_2 = pd.DataFrame(results)
print(results_df_2)

In [None]:
#Implementing Neural Networks in the Heart Attack Dataset

#Importing necessary libraries
import tensorflow as tf
Sequential = tf.keras.models.Sequential
Dense = tf.keras.layers.Dense
Dropout = tf.keras.layers.Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

# Prepare the data from df_2
X_nn = df_3.drop(columns=['cardio'])
y_nn = df_3['cardio']
# One-hot encode categorical columns
X_nn = pd.get_dummies(X_nn, drop_first=True)

# Split the dataset into training and testing sets
X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(X_nn, y_nn, test_size=0.2, random_state=42)

# Scale the features
scaler_nn = StandardScaler()
X_train_nn_scaled = scaler_nn.fit_transform(X_train_nn)
X_test_nn_scaled = scaler_nn.transform(X_test_nn)

# Define a function to compile, train and evaluate a model
def build_and_evaluate(model_arch, epochs=50, batch_size=16):
    model = Sequential(model_arch + [Dense(1, activation='sigmoid')])
    model.compile(optimizer='adam', loss='binary_crossentropy')
    model.fit(X_train_nn_scaled, y_train_nn, epochs=epochs, batch_size=batch_size, verbose=0)
    
    # Predictions
    y_proba = model.predict(X_test_nn_scaled).ravel()
    y_pred = (y_proba > 0.5).astype(int)
    
    # Metrics (multiplied by 100)
    acc = accuracy_score(y_test_nn, y_pred) * 100
    prec = precision_score(y_test_nn, y_pred) * 100
    rec = recall_score(y_test_nn, y_pred) * 100
    f1 = f1_score(y_test_nn, y_pred) * 100
    auc = roc_auc_score(y_test_nn, y_proba) * 100
    
    return acc, prec, rec, f1, auc

results = {'Model': [], 'Accuracy': [], 'Precision': [], 'Recall': [], 'F1 Score': [], 'AUC': []}

# 1. DNN: a deeper network with 3 hidden layers
dnn_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu'),
    Dense(16, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(dnn_arch, epochs=100)
results['Model'].append('DNN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 2. ANN: a shallow (single hidden layer) network
ann_arch = [
    Dense(32, activation='relu', input_dim=X_train_nn_scaled.shape[1])
]
acc, prec, rec, f1, auc = build_and_evaluate(ann_arch, epochs=100)
results['Model'].append('ANN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 3. MLP: a two-hidden layer network
mlp_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(mlp_arch, epochs=100)
results['Model'].append('MLP')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# Create a results DataFrame and print
results_df_3 = pd.DataFrame(results)
print(results_df_3)


In [None]:
#Implementing Neural Networks in the Heart Failure Prediction Dataset

#Importing necessary libraries
import tensorflow as tf
Sequential = tf.keras.models.Sequential
Dense = tf.keras.layers.Dense
Dropout = tf.keras.layers.Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

# Prepare the data from df_4
X_nn = df_4.drop(columns=['target'])
y_nn = df_4['target']
# One-hot encode categorical columns
X_nn = pd.get_dummies(X_nn, drop_first=True)

# Split the dataset into training and testing sets
X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(X_nn, y_nn, test_size=0.2, random_state=42)

# Scale the features
scaler_nn = StandardScaler()
X_train_nn_scaled = scaler_nn.fit_transform(X_train_nn)
X_test_nn_scaled = scaler_nn.transform(X_test_nn)

# Define a function to compile, train and evaluate a model
def build_and_evaluate(model_arch, epochs=50, batch_size=16):
    model = Sequential(model_arch + [Dense(1, activation='sigmoid')])
    model.compile(optimizer='adam', loss='binary_crossentropy')
    model.fit(X_train_nn_scaled, y_train_nn, epochs=epochs, batch_size=batch_size, verbose=0)
    
    # Predictions
    y_proba = model.predict(X_test_nn_scaled).ravel()
    y_pred = (y_proba > 0.5).astype(int)
    
    # Metrics (multiplied by 100)
    acc = accuracy_score(y_test_nn, y_pred) * 100
    prec = precision_score(y_test_nn, y_pred) * 100
    rec = recall_score(y_test_nn, y_pred) * 100
    f1 = f1_score(y_test_nn, y_pred) * 100
    auc = roc_auc_score(y_test_nn, y_proba) * 100
    
    return acc, prec, rec, f1, auc

results = {'Model': [], 'Accuracy': [], 'Precision': [], 'Recall': [], 'F1 Score': [], 'AUC': []}

# 1. DNN: a deeper network with 3 hidden layers
dnn_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu'),
    Dense(16, activation='relu')
]

acc, prec, rec, f1, auc = build_and_evaluate(dnn_arch, epochs=100)
results['Model'].append('DNN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 2. ANN: a shallow (single hidden layer) network
ann_arch = [
    Dense(32, activation='relu', input_dim=X_train_nn_scaled.shape[1])
]
acc, prec, rec, f1, auc = build_and_evaluate(ann_arch, epochs=100)
results['Model'].append('ANN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 3. MLP: a two-hidden layer network
mlp_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(mlp_arch, epochs=100)
results['Model'].append('MLP')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# Create a results DataFrame and print
results_df_4 = pd.DataFrame(results)
print(results_df_4)


Implementing ML algorithms in the datasets with Feature Engineering (PCA)

In [None]:
#Applying PCA to the Cardiovascular Disease Dataset
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

# Splitting the dataset into features and target
X = df.drop(columns=['HeartDisease'])
X = pd.get_dummies(X, drop_first=True)  # One-hot encoding for categorical variables
y = df['HeartDisease']

# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Applying PCA
pca = PCA(n_components=0.95)  # Retain 95% of variance
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Models to evaluate
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "SVM": SVC(probability=True),
    "KNN": KNeighborsClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# Metrics dictionary
metrics = {
    "Model": [],
    "Accuracy": [],
    "Precision": [],
    "Recall": [],
    "F1 Score": [],
    "AUC": []
}

# Training and evaluating each model
for name, model in models.items():
    model.fit(X_train_pca, y_train)
    y_pred = model.predict(X_test_pca)
    y_proba = model.predict_proba(X_test_pca)[:, 1] if hasattr(model, "predict_proba") else None

    metrics["Model"].append(name)
    metrics["Accuracy"].append(accuracy_score(y_test, y_pred) * 100)
    metrics["Precision"].append(precision_score(y_test, y_pred) * 100)
    metrics["Recall"].append(recall_score(y_test, y_pred) * 100)
    metrics["F1 Score"].append(f1_score(y_test, y_pred) * 100)
    metrics["AUC"].append(roc_auc_score(y_test, y_proba) * 100 if y_proba is not None else None)

# Creating a DataFrame for the results
results_pca = pd.DataFrame(metrics)
print(results_pca)

In [None]:
#Applying PCA in the Public Health Dataset and the ML algortihms
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

# Splitting the dataset into features and target
X = df_2.drop(columns=['target'])
X = pd.get_dummies(X, drop_first=True)  # One-hot encoding for categorical variables
y = df_2['target']

# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Applying PCA
pca = PCA(n_components=0.95)  # Retain 95% of variance
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Models to evaluate
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "SVM": SVC(probability=True),
    "KNN": KNeighborsClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# Metrics dictionary
metrics = {
    "Model": [],
    "Accuracy": [],
    "Precision": [],
    "Recall": [],
    "F1 Score": [],
    "AUC": []
}

# Training and evaluating each model
for name, model in models.items():
    model.fit(X_train_pca, y_train)
    y_pred = model.predict(X_test_pca)
    y_proba = model.predict_proba(X_test_pca)[:, 1] if hasattr(model, "predict_proba") else None

    metrics["Model"].append(name)
    metrics["Accuracy"].append(accuracy_score(y_test, y_pred) * 100)
    metrics["Precision"].append(precision_score(y_test, y_pred) * 100)
    metrics["Recall"].append(recall_score(y_test, y_pred) * 100)
    metrics["F1 Score"].append(f1_score(y_test, y_pred) * 100)
    metrics["AUC"].append(roc_auc_score(y_test, y_proba) * 100 if y_proba is not None else None)

# Creating a DataFrame for the results
results_pca_2 = pd.DataFrame(metrics)
print(results_pca_2)

In [None]:
#Applying PCA in the Heart Attack Dataset and the ML algortihms

from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

# Splitting the dataset into features and target
X = df_3.drop(columns=['cardio'])
X = pd.get_dummies(X, drop_first=True)  # One-hot encoding for categorical variables
y = df_3['cardio']

# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Applying PCA
pca = PCA(n_components=0.95)  # Retain 95% of variance
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Models to evaluate
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "SVM": SVC(probability=True),
    "KNN": KNeighborsClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# Metrics dictionary
metrics = {
    "Model": [],
    "Accuracy": [],
    "Precision": [],
    "Recall": [],
    "F1 Score": [],
    "AUC": []
}

# Training and evaluating each model
for name, model in models.items():
    model.fit(X_train_pca, y_train)
    y_pred = model.predict(X_test_pca)
    y_proba = model.predict_proba(X_test_pca)[:, 1] if hasattr(model, "predict_proba") else None

    metrics["Model"].append(name)
    metrics["Accuracy"].append(accuracy_score(y_test, y_pred) * 100)
    metrics["Precision"].append(precision_score(y_test, y_pred) * 100)
    metrics["Recall"].append(recall_score(y_test, y_pred) * 100)
    metrics["F1 Score"].append(f1_score(y_test, y_pred) * 100)
    metrics["AUC"].append(roc_auc_score(y_test, y_proba) * 100 if y_proba is not None else None)

# Creating a DataFrame for the results
results_pca_3 = pd.DataFrame(metrics)
print(results_pca_3)

In [None]:
#Applying PCA in the Heart Failure Prediction Dataset and the ML algortihms

from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import  StandardScaler

# Splitting the dataset into features and target
X = df_4.drop(columns=['target'])
X = pd.get_dummies(X, drop_first=True)  # One-hot encoding for categorical variables
y = df_4['target']

# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Applying PCA
pca = PCA(n_components=0.95)  # Retain 95% of variance
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Models to evaluate
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "SVM": SVC(probability=True),
    "KNN": KNeighborsClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# Metrics dictionary
metrics = {
    "Model": [],
    "Accuracy": [],
    "Precision": [],
    "Recall": [],
    "F1 Score": [],
    "AUC": []
}  

# Training and evaluating each model
for name, model in models.items():
    model.fit(X_train_pca, y_train)
    y_pred = model.predict(X_test_pca)
    y_proba = model.predict_proba(X_test_pca)[:, 1] if hasattr(model, "predict_proba") else None

    metrics["Model"].append(name)
    metrics["Accuracy"].append(accuracy_score(y_test, y_pred) * 100)
    metrics["Precision"].append(precision_score(y_test, y_pred) * 100)
    metrics["Recall"].append(recall_score(y_test, y_pred) * 100)
    metrics["F1 Score"].append(f1_score(y_test, y_pred) * 100)
    metrics["AUC"].append(roc_auc_score(y_test, y_proba) * 100 if y_proba is not None else None)

# Creating a DataFrame for the results
results_pca_4 = pd.DataFrame(metrics)
print(results_pca_4)

Implementing DL algorithms in the dataset with feature engineering (PCA)

In [None]:
#Implementing Neural Networks in the Cardiovascular Disease Dataset with PCA

#Importing necessary libraries
import tensorflow as tf
Sequential = tf.keras.models.Sequential
Dense = tf.keras.layers.Dense
Dropout = tf.keras.layers.Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Prepare the data from df
X_nn = df.drop(columns=['HeartDisease'])
y_nn = df['HeartDisease']
# One-hot encode categorical columns
X_nn = pd.get_dummies(X_nn, drop_first=True)

# Split the dataset into training and testing sets
X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(X_nn, y_nn, test_size=0.2, random_state=42)

# Scale the features
scaler_nn = StandardScaler()
X_train_nn_scaled = scaler_nn.fit_transform(X_train_nn)
X_test_nn_scaled = scaler_nn.transform(X_test_nn)

# Applying PCA
pca = PCA(n_components=0.95)  # Retain 95% of variance
X_nn_pca = pca.fit_transform(X_nn)


# Define a function to compile, train and evaluate a model
def build_and_evaluate(model_arch, epochs=50, batch_size=16):
    model = Sequential(model_arch + [Dense(1, activation='sigmoid')])
    model.compile(optimizer='adam', loss='binary_crossentropy')
    model.fit(X_train_nn_scaled, y_train_nn, epochs=epochs, batch_size=batch_size, verbose=0)
    
    # Predictions
    y_proba = model.predict(X_test_nn_scaled).ravel()
    y_pred = (y_proba > 0.5).astype(int)
    
    # Metrics (multiplied by 100)
    acc = accuracy_score(y_test_nn, y_pred) * 100
    prec = precision_score(y_test_nn, y_pred) * 100
    rec = recall_score(y_test_nn, y_pred) * 100
    f1 = f1_score(y_test_nn, y_pred) * 100
    auc = roc_auc_score(y_test_nn, y_proba) * 100
    
    return acc, prec, rec, f1, auc

results = {'Model': [], 'Accuracy': [], 'Precision': [], 'Recall': [], 'F1 Score': [], 'AUC': []}

# 1. DNN: a deeper network with 3 hidden layers
dnn_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu'),
    Dense(16, activation='relu')
]

acc, prec, rec, f1, auc = build_and_evaluate(dnn_arch, epochs=100)
results['Model'].append('DNN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 2. ANN: a shallow (single hidden layer) network
ann_arch = [
    Dense(32, activation='relu', input_dim=X_train_nn_scaled.shape[1])
]
acc, prec, rec, f1, auc = build_and_evaluate(ann_arch, epochs=100)
results['Model'].append('ANN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 3. MLP: a two-hidden layer network
mlp_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(mlp_arch, epochs=100)
results['Model'].append('MLP')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# Create a results DataFrame and print
results_df_pca = pd.DataFrame(results)
print(results_df_pca)


In [None]:
#Implementing Neural Networks in the Public Health Dataset with PCA

import tensorflow as tf
Sequential = tf.keras.models.Sequential
Dense = tf.keras.layers.Dense
Dropout = tf.keras.layers.Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Prepare the data from df_2
X_nn = df_2.drop(columns=['target'])
y_nn = df_2['target']
# One-hot encode categorical columns
X_nn = pd.get_dummies(X_nn, drop_first=True)

# Split the dataset into training and testing sets
X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(X_nn, y_nn, test_size=0.2, random_state=42)

# Scale the features
scaler_nn = StandardScaler()
X_train_nn_scaled = scaler_nn.fit_transform(X_train_nn)
X_test_nn_scaled = scaler_nn.transform(X_test_nn)

# Applying PCA
pca = PCA(n_components=0.95)  # Retain 95% of variance
X_nn_pca = pca.fit_transform(X_nn)

# Define a function to compile, train and evaluate a model
def build_and_evaluate(model_arch, epochs=50, batch_size=16):
    model = Sequential(model_arch + [Dense(1, activation='sigmoid')])
    model.compile(optimizer='adam', loss='binary_crossentropy')
    model.fit(X_train_nn_scaled, y_train_nn, epochs=epochs, batch_size=batch_size, verbose=0)
    
    # Predictions
    y_proba = model.predict(X_test_nn_scaled).ravel()
    y_pred = (y_proba > 0.5).astype(int)
    
    # Metrics (multiplied by 100)
    acc = accuracy_score(y_test_nn, y_pred) * 100
    prec = precision_score(y_test_nn, y_pred) * 100
    rec = recall_score(y_test_nn, y_pred) * 100
    f1 = f1_score(y_test_nn, y_pred) * 100
    auc = roc_auc_score(y_test_nn, y_proba) * 100
    
    return acc, prec, rec, f1, auc

results = {'Model': [], 'Accuracy': [], 'Precision': [], 'Recall': [], 'F1 Score': [], 'AUC': []}

# 1. DNN: a deeper network with 3 hidden layers
dnn_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu'),
    Dense(16, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(dnn_arch, epochs=100)
results['Model'].append('DNN')

results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)   
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 2. ANN: a shallow (single hidden layer) network   
ann_arch = [
    Dense(32, activation='relu', input_dim=X_train_nn_scaled.shape[1])
]
acc, prec, rec, f1, auc = build_and_evaluate(ann_arch, epochs=100)
results['Model'].append('ANN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 3. MLP: a two-hidden layer network
mlp_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu')
]

acc, prec, rec, f1, auc = build_and_evaluate(mlp_arch, epochs=100)
results['Model'].append('MLP')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# Create a results DataFrame and print
results_df_pca_2 = pd.DataFrame(results)
print(results_df_pca_2)


In [None]:
#Implementing Neural Networks in the Heart Attack Dataset with PCA

import tensorflow as tf
Sequential = tf.keras.models.Sequential
Dense = tf.keras.layers.Dense
Dropout = tf.keras.layers.Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Prepare the data from df_3
X_nn = df_3.drop(columns=['cardio'])
y_nn = df_3['cardio']
# One-hot encode categorical columns
X_nn = pd.get_dummies(X_nn, drop_first=True)

# Split the dataset into training and testing sets
X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(X_nn, y_nn, test_size=0.2, random_state=42)

# Scale the features
scaler_nn = StandardScaler()
X_train_nn_scaled = scaler_nn.fit_transform(X_train_nn)
X_test_nn_scaled = scaler_nn.transform(X_test_nn)

# Applying PCA
pca = PCA(n_components=0.95)  # Retain 95% of variance
X_nn_pca = pca.fit_transform(X_nn)

# Define a function to compile, train and evaluate a model
def build_and_evaluate(model_arch, epochs=50, batch_size=16):
    model = Sequential(model_arch + [Dense(1, activation='sigmoid')])
    model.compile(optimizer='adam', loss='binary_crossentropy')
    model.fit(X_train_nn_scaled, y_train_nn, epochs=epochs, batch_size=batch_size, verbose=0)
    
    # Predictions
    y_proba = model.predict(X_test_nn_scaled).ravel()
    y_pred = (y_proba > 0.5).astype(int)
    
    # Metrics (multiplied by 100)
    acc = accuracy_score(y_test_nn, y_pred) * 100
    prec = precision_score(y_test_nn, y_pred) * 100
    rec = recall_score(y_test_nn, y_pred) * 100
    f1 = f1_score(y_test_nn, y_pred) * 100
    auc = roc_auc_score(y_test_nn, y_proba) * 100
    
    return acc, prec, rec, f1, auc

results = {'Model': [], 'Accuracy': [], 'Precision': [], 'Recall': [], 'F1 Score': [], 'AUC': []}

# 1. DNN: a deeper network with 3 hidden layers
dnn_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu'),
    Dense(16, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(dnn_arch, epochs=100)
results['Model'].append('DNN')

results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)   
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 2. ANN: a shallow (single hidden layer) network   
ann_arch = [
    Dense(32, activation='relu', input_dim=X_train_nn_scaled.shape[1])
]
acc, prec, rec, f1, auc = build_and_evaluate(ann_arch, epochs=100)
results['Model'].append('ANN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 3. MLP: a two-hidden layer network
mlp_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu')
]

acc, prec, rec, f1, auc = build_and_evaluate(mlp_arch, epochs=100)
results['Model'].append('MLP')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# Create a results DataFrame and print
results_df_pca_3 = pd.DataFrame(results)
print(results_df_pca_3)


In [None]:
#Implementing Neural Networks in the Heart Failure Prediction Dataset with PCA

import tensorflow as tf
Sequential = tf.keras.models.Sequential
Dense = tf.keras.layers.Dense
Dropout = tf.keras.layers.Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Prepare the data from df_3
X_nn = df_4.drop(columns=['target'])
y_nn = df_4['target']
# One-hot encode categorical columns
X_nn = pd.get_dummies(X_nn, drop_first=True)

# Split the dataset into training and testing sets
X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(X_nn, y_nn, test_size=0.2, random_state=42)

# Scale the features
scaler_nn = StandardScaler()
X_train_nn_scaled = scaler_nn.fit_transform(X_train_nn)
X_test_nn_scaled = scaler_nn.transform(X_test_nn)

# Applying PCA
pca = PCA(n_components=0.95)  # Retain 95% of variance
X_nn_pca = pca.fit_transform(X_nn)

# Define a function to compile, train and evaluate a model
def build_and_evaluate(model_arch, epochs=50, batch_size=16):
    model = Sequential(model_arch + [Dense(1, activation='sigmoid')])
    model.compile(optimizer='adam', loss='binary_crossentropy')
    model.fit(X_train_nn_scaled, y_train_nn, epochs=epochs, batch_size=batch_size, verbose=0)
    
    # Predictions
    y_proba = model.predict(X_test_nn_scaled).ravel()
    y_pred = (y_proba > 0.5).astype(int)
    
    # Metrics (multiplied by 100)
    acc = accuracy_score(y_test_nn, y_pred) * 100
    prec = precision_score(y_test_nn, y_pred) * 100
    rec = recall_score(y_test_nn, y_pred) * 100
    f1 = f1_score(y_test_nn, y_pred) * 100
    auc = roc_auc_score(y_test_nn, y_proba) * 100
    
    return acc, prec, rec, f1, auc

results = {'Model': [], 'Accuracy': [], 'Precision': [], 'Recall': [], 'F1 Score': [], 'AUC': []}

# 1. DNN: a deeper network with 3 hidden layers
dnn_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu'),
    Dense(16, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(dnn_arch, epochs=100)
results['Model'].append('DNN')

results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)   
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 2. ANN: a shallow (single hidden layer) network   
ann_arch = [
    Dense(32, activation='relu', input_dim=X_train_nn_scaled.shape[1])
]
acc, prec, rec, f1, auc = build_and_evaluate(ann_arch, epochs=100)
results['Model'].append('ANN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 3. MLP: a two-hidden layer network
mlp_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu')
]

acc, prec, rec, f1, auc = build_and_evaluate(mlp_arch, epochs=100)
results['Model'].append('MLP')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# Create a results DataFrame and print
results_df_pca_4 = pd.DataFrame(results)
print(results_df_pca_4)

Implement ML Algorithms in the datasets with Feature Engineering (Lasso)

In [None]:
#Applying Lasso to the Cardiovascular Disease Dataset
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

# Splitting the dataset into features and target
X = df.drop(columns=['HeartDisease'])
X = pd.get_dummies(X, drop_first=True)  # One-hot encoding for categorical variables
y = df['HeartDisease']

# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Applying Lasso
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_scaled, y_train)

# Selecting features with non-zero coefficients
selected_features = lasso.coef_ != 0
X_train_lasso = X_train_scaled[:, selected_features]
X_test_lasso = X_test_scaled[:, selected_features]

# Models to evaluate
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "SVM": SVC(probability=True),
    "KNN": KNeighborsClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# Metrics dictionary
metrics = {
    "Model": [],
    "Accuracy": [],
    "Precision": [],
    "Recall": [],
    "F1 Score": [],
    "AUC": []
}

# Training and evaluating each model
for name, model in models.items():
    model.fit(X_train_lasso, y_train)
    y_pred = model.predict(X_test_lasso)
    y_proba = model.predict_proba(X_test_lasso)[:, 1] if hasattr(model, "predict_proba") else None

    metrics["Model"].append(name)
    metrics["Accuracy"].append(accuracy_score(y_test, y_pred) * 100)
    metrics["Precision"].append(precision_score(y_test, y_pred) * 100)
    metrics["Recall"].append(recall_score(y_test, y_pred) * 100)
    metrics["F1 Score"].append(f1_score(y_test, y_pred) * 100)
    metrics["AUC"].append(roc_auc_score(y_test, y_proba) * 100 if y_proba is not None else None)

# Creating a DataFrame for the results
results_lasso = pd.DataFrame(metrics)
print(results_lasso)

In [None]:
#Applying Lasso to the Public Health Dataset
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

# Splitting the dataset into features and target
X = df_2.drop(columns=['target'])
X = pd.get_dummies(X, drop_first=True)  # One-hot encoding for categorical variables
y = df_2['target']

# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Applying Lasso
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_scaled, y_train)

# Selecting features with non-zero coefficients
selected_features = lasso.coef_ != 0
X_train_lasso = X_train_scaled[:, selected_features]
X_test_lasso = X_test_scaled[:, selected_features]

# Models to evaluate
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "SVM": SVC(probability=True),
    "KNN": KNeighborsClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# Metrics dictionary
metrics = {
    "Model": [],
    "Accuracy": [],
    "Precision": [],
    "Recall": [],
    "F1 Score": [],
    "AUC": []
}

# Training and evaluating each model
for name, model in models.items():
    model.fit(X_train_lasso, y_train)
    y_pred = model.predict(X_test_lasso)
    y_proba = model.predict_proba(X_test_lasso)[:, 1] if hasattr(model, "predict_proba") else None

    metrics["Model"].append(name)
    metrics["Accuracy"].append(accuracy_score(y_test, y_pred) * 100)
    metrics["Precision"].append(precision_score(y_test, y_pred) * 100)
    metrics["Recall"].append(recall_score(y_test, y_pred) * 100)
    metrics["F1 Score"].append(f1_score(y_test, y_pred) * 100)
    metrics["AUC"].append(roc_auc_score(y_test, y_proba) * 100 if y_proba is not None else None)

# Creating a DataFrame for the results
results_lasso_2 = pd.DataFrame(metrics)
print(results_lasso_2)

In [None]:
#Applying Lasso to the Heart Attack Dataset
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

# Splitting the dataset into features and target
X = df_3.drop(columns=['cardio'])
X = pd.get_dummies(X, drop_first=True)  # One-hot encoding for categorical variables
y = df_3['cardio']

# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Applying Lasso
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_scaled, y_train)

# Selecting features with non-zero coefficients
selected_features = lasso.coef_ != 0
X_train_lasso = X_train_scaled[:, selected_features]
X_test_lasso = X_test_scaled[:, selected_features]

# Models to evaluate
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "SVM": SVC(probability=True),
    "KNN": KNeighborsClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# Metrics dictionary
metrics = {
    "Model": [],
    "Accuracy": [],
    "Precision": [],
    "Recall": [],
    "F1 Score": [],
    "AUC": []
}

# Training and evaluating each model
for name, model in models.items():
    model.fit(X_train_lasso, y_train)
    y_pred = model.predict(X_test_lasso)
    y_proba = model.predict_proba(X_test_lasso)[:, 1] if hasattr(model, "predict_proba") else None

    metrics["Model"].append(name)
    metrics["Accuracy"].append(accuracy_score(y_test, y_pred) * 100)
    metrics["Precision"].append(precision_score(y_test, y_pred) * 100)
    metrics["Recall"].append(recall_score(y_test, y_pred) * 100)
    metrics["F1 Score"].append(f1_score(y_test, y_pred) * 100)
    metrics["AUC"].append(roc_auc_score(y_test, y_proba) * 100 if y_proba is not None else None)

# Creating a DataFrame for the results
results_lasso_3 = pd.DataFrame(metrics)
print(results_lasso_3)

In [None]:
#Applying Lasso to the Heart Failure Prediction Dataset
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

# Splitting the dataset into features and target
X = df_4.drop(columns=['target'])
X = pd.get_dummies(X, drop_first=True)  # One-hot encoding for categorical variables
y = df_4['target']

# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Applying Lasso
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_scaled, y_train)

# Selecting features with non-zero coefficients
selected_features = lasso.coef_ != 0
X_train_lasso = X_train_scaled[:, selected_features]
X_test_lasso = X_test_scaled[:, selected_features]

# Models to evaluate
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "SVM": SVC(probability=True),
    "KNN": KNeighborsClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# Metrics dictionary
metrics = {
    "Model": [],
    "Accuracy": [],
    "Precision": [],
    "Recall": [],
    "F1 Score": [],
    "AUC": []
}

# Training and evaluating each model
for name, model in models.items():
    model.fit(X_train_lasso, y_train)
    y_pred = model.predict(X_test_lasso)
    y_proba = model.predict_proba(X_test_lasso)[:, 1] if hasattr(model, "predict_proba") else None

    metrics["Model"].append(name)
    metrics["Accuracy"].append(accuracy_score(y_test, y_pred) * 100)
    metrics["Precision"].append(precision_score(y_test, y_pred) * 100)
    metrics["Recall"].append(recall_score(y_test, y_pred) * 100)
    metrics["F1 Score"].append(f1_score(y_test, y_pred) * 100)
    metrics["AUC"].append(roc_auc_score(y_test, y_proba) * 100 if y_proba is not None else None)

# Creating a DataFrame for the results
results_lasso_4 = pd.DataFrame(metrics)
print(results_lasso_4)


Implement DL Algorithms in the datasets with Lasso

In [None]:
#Implement Neural Networks in the Cardiovascular Disease Dataset with Lasso

import tensorflow as tf
Sequential = tf.keras.models.Sequential
Dense = tf.keras.layers.Dense
Dropout = tf.keras.layers.Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso

# Prepare the data from df
X_nn = df.drop(columns=['HeartDisease'])
y_nn = df['HeartDisease']
# One-hot encode categorical columns
X_nn = pd.get_dummies(X_nn, drop_first=True)

# Split the dataset into training and testing sets
X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(X_nn, y_nn, test_size=0.2, random_state=42)

# Scale the features
scaler_nn = StandardScaler()
X_train_nn_scaled = scaler_nn.fit_transform(X_train_nn)
X_test_nn_scaled = scaler_nn.transform(X_test_nn)

# Applying Lasso
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_nn_scaled, y_train_nn)


# Define a function to compile, train and evaluate a model
def build_and_evaluate(model_arch, epochs=50, batch_size=16):
    model = Sequential(model_arch + [Dense(1, activation='sigmoid')])
    model.compile(optimizer='adam', loss='binary_crossentropy')
    model.fit(X_train_nn_scaled, y_train_nn, epochs=epochs, batch_size=batch_size, verbose=0)
    
    # Predictions
    y_proba = model.predict(X_test_nn_scaled).ravel()
    y_pred = (y_proba > 0.5).astype(int)
    
    # Metrics (multiplied by 100)
    acc = accuracy_score(y_test_nn, y_pred) * 100
    prec = precision_score(y_test_nn, y_pred) * 100
    rec = recall_score(y_test_nn, y_pred) * 100
    f1 = f1_score(y_test_nn, y_pred) * 100
    auc = roc_auc_score(y_test_nn, y_proba) * 100
    
    return acc, prec, rec, f1, auc

results = {'Model': [], 'Accuracy': [], 'Precision': [], 'Recall': [], 'F1 Score': [], 'AUC': []}

# 1. DNN: a deeper network with 3 hidden layers
dnn_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu'),
    Dense(16, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(dnn_arch, epochs=100)
results['Model'].append('DNN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 2. ANN: a shallow (single hidden layer) network
ann_arch = [
    Dense(32, activation='relu', input_dim=X_train_nn_scaled.shape[1])
]
acc, prec, rec, f1, auc = build_and_evaluate(ann_arch, epochs=100)
results['Model'].append('ANN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 3. MLP: a two-hidden layer network
mlp_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(mlp_arch, epochs=100)
results['Model'].append('MLP')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# Create a results DataFrame and print
results_lasso_dl = pd.DataFrame(results)
print(results_lasso_dl)


In [None]:
#Implement Neural Networks in the Public Health Dataset with Lasso

import tensorflow as tf
Sequential = tf.keras.models.Sequential
Dense = tf.keras.layers.Dense
Dropout = tf.keras.layers.Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso

# Prepare the data from df_
X_nn = df_2.drop(columns=['target'])
y_nn = df_2['target']
# One-hot encode categorical columns
X_nn = pd.get_dummies(X_nn, drop_first=True)

# Split the dataset into training and testing sets
X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(X_nn, y_nn, test_size=0.2, random_state=42)

# Scale the features
scaler_nn = StandardScaler()
X_train_nn_scaled = scaler_nn.fit_transform(X_train_nn)
X_test_nn_scaled = scaler_nn.transform(X_test_nn)

# Applying Lasso
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_nn_scaled, y_train_nn)


# Define a function to compile, train and evaluate a model
def build_and_evaluate(model_arch, epochs=50, batch_size=16):
    model = Sequential(model_arch + [Dense(1, activation='sigmoid')])
    model.compile(optimizer='adam', loss='binary_crossentropy')
    model.fit(X_train_nn_scaled, y_train_nn, epochs=epochs, batch_size=batch_size, verbose=0)
    
    # Predictions
    y_proba = model.predict(X_test_nn_scaled).ravel()
    y_pred = (y_proba > 0.5).astype(int)
    
    # Metrics (multiplied by 100)
    acc = accuracy_score(y_test_nn, y_pred) * 100
    prec = precision_score(y_test_nn, y_pred) * 100
    rec = recall_score(y_test_nn, y_pred) * 100
    f1 = f1_score(y_test_nn, y_pred) * 100
    auc = roc_auc_score(y_test_nn, y_proba) * 100
    
    return acc, prec, rec, f1, auc

results = {'Model': [], 'Accuracy': [], 'Precision': [], 'Recall': [], 'F1 Score': [], 'AUC': []}

# 1. DNN: a deeper network with 3 hidden layers
dnn_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu'),
    Dense(16, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(dnn_arch, epochs=100)
results['Model'].append('DNN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 2. ANN: a shallow (single hidden layer) network
ann_arch = [
    Dense(32, activation='relu', input_dim=X_train_nn_scaled.shape[1])
]
acc, prec, rec, f1, auc = build_and_evaluate(ann_arch, epochs=100)
results['Model'].append('ANN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 3. MLP: a two-hidden layer network
mlp_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(mlp_arch, epochs=100)
results['Model'].append('MLP')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# Create a results DataFrame and print
results_lasso_dl_2 = pd.DataFrame(results)
print(results_lasso_dl_2)

In [None]:
#Implement Neural Networks in the Heart Attack Dataset with Lasso

import tensorflow as tf
Sequential = tf.keras.models.Sequential
Dense = tf.keras.layers.Dense
Dropout = tf.keras.layers.Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso

# Prepare the data from df_3
X_nn = df_3.drop(columns=['cardio'])
y_nn = df_3['cardio']
# One-hot encode categorical columns
X_nn = pd.get_dummies(X_nn, drop_first=True)

# Split the dataset into training and testing sets
X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(X_nn, y_nn, test_size=0.2, random_state=42)

# Scale the features
scaler_nn = StandardScaler()
X_train_nn_scaled = scaler_nn.fit_transform(X_train_nn)
X_test_nn_scaled = scaler_nn.transform(X_test_nn)

# Applying Lasso
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_nn_scaled, y_train_nn)


# Define a function to compile, train and evaluate a model
def build_and_evaluate(model_arch, epochs=50, batch_size=16):
    model = Sequential(model_arch + [Dense(1, activation='sigmoid')])
    model.compile(optimizer='adam', loss='binary_crossentropy')
    model.fit(X_train_nn_scaled, y_train_nn, epochs=epochs, batch_size=batch_size, verbose=0)
    
    # Predictions
    y_proba = model.predict(X_test_nn_scaled).ravel()
    y_pred = (y_proba > 0.5).astype(int)
    
    # Metrics (multiplied by 100)
    acc = accuracy_score(y_test_nn, y_pred) * 100
    prec = precision_score(y_test_nn, y_pred) * 100
    rec = recall_score(y_test_nn, y_pred) * 100
    f1 = f1_score(y_test_nn, y_pred) * 100
    auc = roc_auc_score(y_test_nn, y_proba) * 100
    
    return acc, prec, rec, f1, auc

results = {'Model': [], 'Accuracy': [], 'Precision': [], 'Recall': [], 'F1 Score': [], 'AUC': []}

# 1. DNN: a deeper network with 3 hidden layers
dnn_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu'),
    Dense(16, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(dnn_arch, epochs=100)
results['Model'].append('DNN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 2. ANN: a shallow (single hidden layer) network
ann_arch = [
    Dense(32, activation='relu', input_dim=X_train_nn_scaled.shape[1])
]
acc, prec, rec, f1, auc = build_and_evaluate(ann_arch, epochs=100)
results['Model'].append('ANN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 3. MLP: a two-hidden layer network
mlp_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(mlp_arch, epochs=100)
results['Model'].append('MLP')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# Create a results DataFrame and print
results_lasso_dl_3 = pd.DataFrame(results)
print(results_lasso_dl_3)



In [None]:
#Implement Neural Networks in the Heart Failure Prediction Dataset with Lasso

import tensorflow as tf
Sequential = tf.keras.models.Sequential
Dense = tf.keras.layers.Dense
Dropout = tf.keras.layers.Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso

# Prepare the data from df_4
X_nn = df_4.drop(columns=['target'])
y_nn = df_4['target']
# One-hot encode categorical columns
X_nn = pd.get_dummies(X_nn, drop_first=True)

# Split the dataset into training and testing sets
X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(X_nn, y_nn, test_size=0.2, random_state=42)

# Scale the features
scaler_nn = StandardScaler()
X_train_nn_scaled = scaler_nn.fit_transform(X_train_nn)
X_test_nn_scaled = scaler_nn.transform(X_test_nn)

# Applying Lasso
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_nn_scaled, y_train_nn)


# Define a function to compile, train and evaluate a model
def build_and_evaluate(model_arch, epochs=50, batch_size=16):
    model = Sequential(model_arch + [Dense(1, activation='sigmoid')])
    model.compile(optimizer='adam', loss='binary_crossentropy')
    model.fit(X_train_nn_scaled, y_train_nn, epochs=epochs, batch_size=batch_size, verbose=0)
    
    # Predictions
    y_proba = model.predict(X_test_nn_scaled).ravel()
    y_pred = (y_proba > 0.5).astype(int)
    
    # Metrics (multiplied by 100)
    acc = accuracy_score(y_test_nn, y_pred) * 100
    prec = precision_score(y_test_nn, y_pred) * 100
    rec = recall_score(y_test_nn, y_pred) * 100
    f1 = f1_score(y_test_nn, y_pred) * 100
    auc = roc_auc_score(y_test_nn, y_proba) * 100
    
    return acc, prec, rec, f1, auc

results = {'Model': [], 'Accuracy': [], 'Precision': [], 'Recall': [], 'F1 Score': [], 'AUC': []}

# 1. DNN: a deeper network with 3 hidden layers
dnn_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu'),
    Dense(16, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(dnn_arch, epochs=100)
results['Model'].append('DNN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 2. ANN: a shallow (single hidden layer) network
ann_arch = [
    Dense(32, activation='relu', input_dim=X_train_nn_scaled.shape[1])
]
acc, prec, rec, f1, auc = build_and_evaluate(ann_arch, epochs=100)
results['Model'].append('ANN')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# 3. MLP: a two-hidden layer network
mlp_arch = [
    Dense(64, activation='relu', input_dim=X_train_nn_scaled.shape[1]),
    Dense(32, activation='relu')
]
acc, prec, rec, f1, auc = build_and_evaluate(mlp_arch, epochs=100)
results['Model'].append('MLP')
results['Accuracy'].append(acc)
results['Precision'].append(prec)
results['Recall'].append(rec)
results['F1 Score'].append(f1)
results['AUC'].append(auc)

# Create a results DataFrame and print
results_lasso_dl_4 = pd.DataFrame(results)
print(results_lasso_dl_4)

