# Part - 1 : Reproduction of published results

Here, we run the individual models along with the paper's proposed 5-fold stacking ensemble model and view their result in different metrics. 

In [56]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Dataset
df = pd.read_csv("D:/Deakin Coursework/T1 semester/SIT - 720 - Machine Learning/Task 11.1HD/11.1HD/heart.csv")

# Split features and target
X = df.drop(columns="target")
y = df["target"]

# Preprocessing
scaler = MinMaxScaler()          # Scale features using MinMaxScaler
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=42)

# Defining models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(eval_metric='logloss', random_state=42),
    "Naive Bayes": GaussianNB(),
    "KNN": KNeighborsClassifier()
}

# Evaluate and collect results
results = []
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]
    results.append({
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "F1 Score": f1_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "AUC": roc_auc_score(y_test, y_prob)
    })
    
# Stacking Ensemble from the paper's proposed model
stack_model = StackingClassifier(
    estimators=[
        ('lr', LogisticRegression(max_iter=1000)),
        ('dt', DecisionTreeClassifier()),
        ('rf', RandomForestClassifier(n_estimators=100)),
        ('xgb', XGBClassifier(eval_metric='logloss'))
    ],
    final_estimator=LogisticRegression(),
    cv=5
)

stack_model.fit(X_train, y_train)
y_pred = stack_model.predict(X_test)
y_prob = stack_model.predict_proba(X_test)[:, 1]

results.append({
    "Model": "Stacking Ensemble",
    "Accuracy": accuracy_score(y_test, y_pred),
    "F1 Score": f1_score(y_test, y_pred),
    "Recall": recall_score(y_test, y_pred),
    "Precision": precision_score(y_test, y_pred),
    "AUC": roc_auc_score(y_test, y_prob)
})
# Results
results_df = pd.DataFrame(results)
results_df.sort_values(by="Accuracy", ascending=True).reset_index(drop=True)

Unnamed: 0,Model,Accuracy,F1 Score,Recall,Precision,AUC
0,Naive Bayes,0.829268,0.840183,0.87619,0.807018,0.904286
1,Logistic Regression,0.834146,0.849558,0.914286,0.793388,0.928
2,KNN,0.853659,0.855769,0.847619,0.864078,0.958952
3,Decision Tree,0.985366,0.985507,0.971429,1.0,0.985714
4,Random Forest,1.0,1.0,1.0,1.0,1.0
5,XGBoost,1.0,1.0,1.0,1.0,1.0
6,Stacking Ensemble,1.0,1.0,1.0,1.0,1.0


The results are slightly different than the proposed paper's. This could be due to the fact that although paper used train/test split, the exact ratio is not specified. We used 80/20 split as standard. The parameters for logistic regression, decision tree, random forest and KNN is not mentioned. So we can only assume them or use default. In our reproduction of their report's value, we used max_iter = 1000 for Logistic Regression and n_estimators=100 for Random Forest and default for others. 
Their proposed model itself was not perfect 1.00 or 100%, however our version of their model was. We applied 5-fold stacking ensemble like the paper had done. However our results gave better result. This could be due to the train/test split decision and using different parameters. Hence the difference in results.

# Part - 2: Creation of our own model

In [60]:
# Reloading the libraries and dataset to keep my model seprerated overall from the workings of the reproduction of the paper' model.
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from xgboost import XGBClassifier
from sklearn.feature_selection import RFE
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score


df = pd.read_csv("D:/Deakin Coursework/T1 semester/SIT - 720 - Machine Learning/Task 11.1HD/11.1HD/heart.csv")
X = df.drop(columns="target")
y = df["target"]

# Scaling
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# RFE Feature Selection
rfe_selector = RFE(estimator=LogisticRegression(max_iter=1000), n_features_to_select=10)
X_rfe = rfe_selector.fit_transform(X_scaled, y)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_rfe, y, test_size=0.2, stratify=y, random_state=42)

# Applying SMOTE to balance training set
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

# Defining base models
lr = LogisticRegression(max_iter=1000, random_state=42)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
xgb = XGBClassifier(eval_metric='logloss', random_state=42)

# Voting ensemble
voting = VotingClassifier(estimators=[('lr', lr), ('rf', rf), ('xgb', xgb)], voting='soft')
voting.fit(X_train_res, y_train_res)

# Predict and evaluate
y_pred = voting.predict(X_test)
y_prob = voting.predict_proba(X_test)[:, 1]

# Metrics
metrics = {
    "Accuracy": accuracy_score(y_test, y_pred),
    "Precision": precision_score(y_test, y_pred),
    "Recall": recall_score(y_test, y_pred),
    "F1 Score": f1_score(y_test, y_pred),
    "AUC": roc_auc_score(y_test, y_prob)
}

# Results
print("\nOur proposed model:\n")
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")



Our proposed model:

Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000
AUC: 1.0000
