In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression, Lasso
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix

# Load the dataset
d1 = pd.read_csv(r'C:\Users\Md. Mohir Uddin\Desktop\Thesis on Heart failure Predictions\dataset\Heart_a.csv');
d2 = pd.read_csv(r'C:\Users\Md. Mohir Uddin\Desktop\Thesis on Heart failure Predictions\dataset\Heart_b.csv');
d3 = pd.read_csv(r'C:\Users\Md. Mohir Uddin\Desktop\Thesis on Heart failure Predictions\dataset\Heart_c.csv');
d4 = pd.read_csv(r'C:\Users\Md. Mohir Uddin\Desktop\Thesis on Heart failure Predictions\dataset\Heart_d.csv');
d5 = pd.read_csv(r'C:\Users\Md. Mohir Uddin\Desktop\Thesis on Heart failure Predictions\dataset\Heart_e.csv');



# Combine all datasets into one
dat = pd.concat([d1, d2, d3, d4, d5])

# Reset index
dat.reset_index(drop=True, inplace=True)

# Separate features and target variable
X = dat.iloc[:, :-1]  # Features
y = dat['DEATH_EVENT']  # Target variable

# Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.80, random_state=30)

# Define classifiers
classifiers = {
    "Logistic Regression": LogisticRegression(random_state=30),
    "Support Vector Machine(SVM)": SVC(kernel='linear', random_state=30),
    "K-Nearest Neighbors(KNN)": KNeighborsClassifier(n_neighbors=5),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=30),
    "Naive Bayes"  : GaussianNB()
}

# Define methods for dimensionality reduction and feature selection
methods = {
    "PCA" : PCA(n_components=5),
    "LDA" : LinearDiscriminantAnalysis(n_components=1),
    "Lasso" : Lasso(alpha=0.1),
    "RFE" : RFE(estimator=LogisticRegression(), n_features_to_select=5)
}

# Iterate over classifiers
for clf_name, clf in classifiers.items():
    print(f"          Classifier: {clf_name}")
    
    # Iterate over methods
    for method_name, method in methods.items():
        print(f"Method: {method_name}")
        
        # Apply dimensionality reduction or feature selection
        if method_name in ["PCA", "LDA"]:
            X_train_reduced = method.fit_transform(X_train, y_train)
            X_test_reduced = method.transform(X_test)
        elif method_name == "Lasso":
            lasso_model = method
            lasso_model.fit(X_train, y_train)
            selected_features = lasso_model.coef_ != 0
            X_train_reduced = X_train[:, selected_features]
            X_test_reduced = X_test[:, selected_features]
        elif method_name == "RFE":
            selector = method
            selector = selector.fit(X_train, y_train)
            selected_features = selector.support_
            X_train_reduced = X_train[:, selected_features]
            X_test_reduced = X_test[:, selected_features]
        
        # Train and evaluate the classifier
        clf.fit(X_train_reduced, y_train)
        y_pred = clf.predict(X_test_reduced)
        accuracy = accuracy_score(y_test, y_pred)
        print(f"Accuracy: {accuracy}")
    print("\n");






from sklearn.model_selection import cross_val_score

# Iterate over classifiers
print("------------------------Check for Cross Validation---------------------------\n")
for clf_name, clf in classifiers.items():
    print(f"               Classifier    : {clf_name}")
    
    # Iterate over methods
    for method_name, method in methods.items():
        print(f"Method: {method_name}")
        
        # Apply dimensionality reduction or feature selection
        if method_name in ["PCA", "LDA"]:
            X_reduced = method.fit_transform(X_scaled, y)
        elif method_name == "Lasso":
            lasso_model = method
            lasso_model.fit(X_scaled, y)
            selected_features = lasso_model.coef_ != 0
            X_reduced = X_scaled[:, selected_features]
        elif method_name == "RFE":
            selector = method
            selector = selector.fit(X_scaled, y)
            selected_features = selector.support_
            X_reduced = X_scaled[:, selected_features]
        else:
            X_reduced = X_scaled
        
        # Perform cross-validation
        scores = cross_val_score(clf, X_reduced, y, cv=5, scoring='accuracy')
        print(f"Cross-Validation Scores: {scores}")
        print(f"Mean Accuracy: {scores.mean()}")
        print("\n");
        
      


          Classifier: Logistic Regression
Method: PCA
Accuracy: 0.822742474916388
Method: LDA
Accuracy: 0.8511705685618729
Method: Lasso
Accuracy: 0.8637123745819398
Method: RFE
Accuracy: 0.8394648829431438


          Classifier: Support Vector Machine(SVM)
Method: PCA
Accuracy: 0.8269230769230769
Method: LDA
Accuracy: 0.8620401337792643
Method: Lasso
Accuracy: 0.867056856187291
Method: RFE
Accuracy: 0.8494983277591973


          Classifier: K-Nearest Neighbors(KNN)
Method: PCA
Accuracy: 0.8127090301003345
Method: LDA
Accuracy: 0.8578595317725752
Method: Lasso
Accuracy: 0.8478260869565217
Method: RFE
Accuracy: 0.862876254180602


          Classifier: Random Forest
Method: PCA
Accuracy: 0.8762541806020067
Method: LDA
Accuracy: 0.8954849498327759
Method: Lasso
Accuracy: 0.9264214046822743
Method: RFE
Accuracy: 0.9431438127090301


          Classifier: Naive Bayes
Method: PCA
Accuracy: 0.810200668896321
Method: LDA
Accuracy: 0.8620401337792643
Method: Lasso
Accuracy: 0.767558528428093