In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder,LabelEncoder,StandardScaler,MinMaxScaler,RobustScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
df=pd.read_csv("F:\\TIHANNIIT\\Dataset_tihan\\heart.csv")
df.drop_duplicates()
df.isna().sum()
x = df.drop("HeartDisease", axis=1)
y = df["HeartDisease"]

# Identify column types
numcol = x.select_dtypes(include=["int64", "float64"]).columns.tolist()
catcol = x.select_dtypes(include=["object"]).columns.tolist()

# Scalers and encoders
scalers = {
    "StandardScaler": StandardScaler(),
    "MinMaxScaler": MinMaxScaler(),
    "RobustScaler": RobustScaler()
}

encoders = ["LabelEncoder", "OneHotEncoder"]

results = []

# Main loop
for scaler_name, scaler in scalers.items():
    for encoder_name in encoders:
        x_copy = x.copy()

        # Encode categorical columns
        if encoder_name == "LabelEncoder":
            le = LabelEncoder()
            for col in catcol:
                x_copy[col] = le.fit_transform(x_copy[col])
        else:
            x_copy = pd.get_dummies(x_copy, columns=catcol, drop_first=True)

        # Scale numeric columns
        x_copy[numcol] = scaler.fit_transform(x_copy[numcol])

        # Train-test split
        x_train, x_test, y_train, y_test = train_test_split(x_copy, y, test_size=0.2, random_state=42)

        # Model
        model = LogisticRegression(max_iter=500)
        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)

        # Metrics
        cr = classification_report(y_test, y_pred, output_dict=True)
        cm = confusion_matrix(y_test, y_pred)
        ac = accuracy_score(y_test, y_pred)

        results.append({
            "Scaler": scaler_name,
            "Encoder": encoder_name,
            "Accuracy": ac,
            "ConfusionMatrix": cm.tolist()
        })

# Show results
results_df = pd.DataFrame(results)
print(results_df[["Scaler", "Encoder", "Accuracy"]])
print("\n")
print(f"Confusion Matrix")
print(cm)
print("\n")
print(f"Accuracy Score")
print(ac)

           Scaler        Encoder  Accuracy
0  StandardScaler   LabelEncoder  0.842391
1  StandardScaler  OneHotEncoder  0.853261
2    MinMaxScaler   LabelEncoder  0.853261
3    MinMaxScaler  OneHotEncoder  0.853261
4    RobustScaler   LabelEncoder  0.842391
5    RobustScaler  OneHotEncoder  0.853261


Confusion Matrix
[[67 10]
 [17 90]]


Accuracy Score
0.8532608695652174
