In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data=pd.read_csv('src/heart_statlog_cleveland_hungary_final.csv')

In [None]:
data["target"].value_counts()

In [None]:
x_norm=data.drop(['target'],axis=1)
y=data['target']
x_norm

In [None]:
#scale data

from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
x=scaler.fit_transform(x_norm)
x

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [None]:
# Gaussian Na√Øve Bayes
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report
nb=GaussianNB()
nb


In [None]:
nb.fit(x_train,y_train)

In [None]:
y_pred_nb=nb.predict(x_test)
print(classification_report(y_test,y_pred_nb))

In [None]:
from sklearn.metrics import accuracy_score,confusion_matrix
acc_gnb=accuracy_score(y_test,y_pred_nb)*100
print("Accuracy=",acc_gnb)

In [None]:
nb_cm=confusion_matrix(y_test,y_pred_nb)
sns.heatmap(nb_cm,annot=True, fmt='d',cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')

In [None]:
#Logistic Regression
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression()

In [None]:
lr.fit(x_train, y_train)
y_pred_lr = lr.predict(x_test)
print(classification_report(y_test, y_pred_lr))

In [None]:
acc_lr = accuracy_score(y_test, y_pred_lr) * 100
print("Accuracy=",acc_lr)


In [None]:
lr_cm = confusion_matrix(y_test, y_pred_lr)
sns.heatmap(lr_cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for Logistic Regression')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
#Decision Tree classifier
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
dt.fit(x_train, y_train)


In [None]:
# Make predictions on the test set
y_pred_dt = dt.predict(x_test)
#Evaluate the model
print(classification_report(y_test, y_pred_dt))

In [None]:
acc_dt = accuracy_score(y_test, y_pred_dt) * 100
print("Accuracy=", acc_dt)

In [None]:
dt_cm = confusion_matrix(y_test, y_pred_dt)
sns.heatmap(dt_cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for Decision Tree')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5) # You can adjust the number neighbors
knn.fit(x_train, y_train)


In [None]:
# Make predictions on the test set
y_pred_knn = knn.predict(x_test)

In [None]:
# Evaluate the model
print(classification_report(y_test, y_pred_knn))

In [None]:
acc_knn = accuracy_score(y_test, y_pred_knn) * 100
print("Accuracy=",acc_knn)
knn_cm = confusion_matrix(y_test, y_pred_knn)

In [None]:
sns.heatmap(knn_cm, annot=True, fmt='d', cmap='Reds')
plt.title('Confusion Matrix for KNN')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
#SVM classifier
from sklearn.svm import SVC
svm = SVC(kernel='linear', random_state=42) # You can change the kernel (e.g., 'rbf', 'poly')
svm.fit(x_train, y_train)


In [None]:
# Make predictions on the test set
y_pred_svm = svm.predict(x_test)
#Evaluate the model
print(classification_report(y_test, y_pred_svm))


In [None]:
acc_svm = accuracy_score(y_test, y_pred_svm) * 100
print("Accuracy=",acc_svm)

In [None]:
svm_cm = confusion_matrix(y_test, y_pred_svm)
sns.heatmap(svm_cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for SVM')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
model_accuracies = {
    'Naive Bayes': acc_gnb,
    'Logistic Regression': acc_lr,
    'Decision Tree': acc_dt,
    'KNN': acc_knn,
    'SVM': acc_svm
}

best_model = max(model_accuracies,key=model_accuracies.get)
best_accuracy = model_accuracies[best_model]

In [None]:
print(f"The best performing model is {best_model} with an accuracy of {best_accuracy:.2f}%")

In [None]:
model_data = {
'Model': ['Naive Bayes', 'Logistic Regression', 'Decision Tree', 'KNN', 'SVM'],
'Accuracy': [acc_gnb, acc_lr, acc_dt, acc_knn, acc_svm]
}

In [None]:
model_comparison = pd.DataFrame(model_data)
model_comparison


In [None]:
colors = ['skyblue', 'lightcoral', 'lightgreen', 'gold', 'plum']
plt.figure(figsize=(10, 6))
plt.bar(model_comparison['Model'],model_comparison['Accuracy'], color=colors)

plt.xlabel("Model")

plt.ylabel("Accuracy (%)")

plt.title("Model Comparison")

plt.ylim(0, 100) # Set y-axis limit to 100%