<a href="https://colab.research.google.com/github/Bilal-Hijazi/Colab-Code/blob/main/Heart_Disease.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE

# Read the data
data = pd.read_csv('Heart_Disease_Prediction.csv')

# Drop age and sex columns
data.drop(['Age', 'Sex'], axis=1, inplace=True)

# Change 'presence' to 1 and 'absence' to 0 in the 'Heart Disease' column
data['Heart Disease'] = data['Heart Disease'].apply(lambda x: 1 if x.lower() == 'presence' else 0)
print(data.head())
# Separate features and target variable
X = data.drop('Heart Disease', axis=1)
y = data['Heart Disease']

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Splitting the balanced dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=42)

# Results dictionary
results = {}

# For each r, train, and record precision, recall, and F1 score
# Classifier used: Random Forest
for r in np.arange(0.9, 0, -0.1):
    # Training the Random Forest Classifier
    clf = RandomForestClassifier(random_state=42)
    clf.fit(X_train, y_train)
    # Predictions and recording precision, recall, and F1 score
    y_pred = clf.predict(X_test)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    results[round(r, 1)] = {'Precision': round(precision, 3), 'Recall': round(recall, 3), 'F1 Score': round(f1, 3)}

# Print the results
pd.DataFrame(results)



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
import xgboost as xgb

# Read the data
data = pd.read_csv('Heart_Disease_Prediction.csv')

# Drop age and sex columns
data.drop(['Age', 'Sex'], axis=1, inplace=True)

# Change 'presence' to 1 and 'absence' to 0 in the 'Heart Disease' column
data['Heart Disease'] = data['Heart Disease'].apply(lambda x: 1 if x.lower() == 'presence' else 0)

# Separate features and target variable
X = data.drop('Heart Disease', axis=1)
y = data['Heart Disease']

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Splitting the balanced dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=42)

# Results dictionary
results = {}

# For each r, train, and record precision, recall, and F1 score
# Classifier used: XGBoost
for r in np.arange(0.9, 0, -0.1):
    # Training the XGBoost Classifier
    clf = xgb.XGBClassifier(random_state=42)
    clf.fit(X_train, y_train)
    # Predictions and recording precision, recall, and F1 score
    y_pred = clf.predict(X_test)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    results[round(r, 1)] = {'Precision': round(precision, 3), 'Recall': round(recall, 3), 'F1 Score': round(f1, 3)}

# Print the results
print(pd.DataFrame(results))



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE

# Read the data
data = pd.read_csv('Heart_Disease_Prediction.csv')

# Drop age and sex columns
data.drop(['Age', 'Sex'], axis=1, inplace=True)

# Change 'presence' to 1 and 'absence' to 0 in the 'Heart Disease' column
data['Heart Disease'] = data['Heart Disease'].apply(lambda x: 1 if x.lower() == 'presence' else 0)

# Separate features and target variable
X = data.drop('Heart Disease', axis=1)
y = data['Heart Disease']

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Splitting the balanced dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=42)

# Results dictionary
results = {}

# For each r, train, and record precision, recall, and F1 score
# Classifier used: Support Vector Machine (SVM)
for r in np.arange(0.9, 0, -0.1):
    # Training the SVM Classifier
    clf = SVC(random_state=42)
    clf.fit(X_train, y_train)
    # Predictions and recording precision, recall, and F1 score
    y_pred = clf.predict(X_test)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    results[round(r, 1)] = {'Precision': round(precision, 3), 'Recall': round(recall, 3), 'F1 Score': round(f1, 3)}

# Print the results
print(pd.DataFrame(results))


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE

# Read the data
data = pd.read_csv('Heart_Disease_Prediction.csv')

# Drop age and sex columns
data.drop(['Age', 'Sex'], axis=1, inplace=True)

# Change 'presence' to 1 and 'absence' to 0 in the 'Heart Disease' column
data['Heart Disease'] = data['Heart Disease'].apply(lambda x: 1 if x.lower() == 'presence' else 0)

# Separate features and target variable
X = data.drop('Heart Disease', axis=1)
y = data['Heart Disease']

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Splitting the balanced dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=42)

# Results dictionary
results = {}

# For each r, train, and record precision, recall, and F1 score
# Classifier used: MLPClassifier (Neural Network)
for r in np.arange(0.9, 0, -0.1):
    # Training the MLPClassifier
    clf = MLPClassifier(random_state=42)
    clf.fit(X_train, y_train)
    # Predictions and recording precision, recall, and F1 score
    y_pred = clf.predict(X_test)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    results[round(r, 1)] = {'Precision': round(precision, 3), 'Recall': round(recall, 3), 'F1 Score': round(f1, 3)}

# Print the results
print(pd.DataFrame(results))


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE

# Read the data
data = pd.read_csv('Heart_Disease_Prediction.csv')

# Drop age and sex columns
data.drop(['Age', 'Sex'], axis=1, inplace=True)

# Change 'presence' to 1 and 'absence' to 0 in the 'Heart Disease' column
data['Heart Disease'] = data['Heart Disease'].apply(lambda x: 1 if x.lower() == 'presence' else 0)

# Separate features and target variable
X = data.drop('Heart Disease', axis=1)
y = data['Heart Disease']

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Splitting the balanced dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=42)

# Results dictionary
results = {}

for r in np.arange(0.9, 0, -0.1):

    positive_indices = np.where(y_train == 1)[0]
    np.random.shuffle(positive_indices)
    selected_positive_indices = positive_indices[:int(r * len(positive_indices))]
    negative_indices = np.where(y_train == 0)[0]


    imbalanced_train_indices = np.concatenate((selected_positive_indices, negative_indices))
    X_train_imbalanced = X_train.iloc[imbalanced_train_indices]
    y_train_imbalanced = y_train.iloc[imbalanced_train_indices]


    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train_imbalanced.shape[1],)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])


    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # Train the model
    model.fit(X_train_imbalanced, y_train_imbalanced, epochs=10, batch_size=32, verbose=0)

    # Predictions and recording precision, recall, and F1 score
    y_pred = (model.predict(X_test) > 0.5).astype("int32")
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    results[round(r, 1)] = {'Precision': round(precision, 3), 'Recall': round(recall, 3), 'F1 Score': round(f1, 3)}

# Print the results
print(pd.DataFrame(results))
