In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score

# Load the dataset
heart_data = pd.read_csv('heart.csv')

# Splitting the data
X = heart_data.drop(columns='target', axis=1)
Y = heart_data['target']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

# Initialize models
logistic_model = LogisticRegression()
# linear_model = LinearRegression()
knn_model = KNeighborsClassifier(n_neighbors=5)  # You can adjust the number of neighbors
svm_model = SVC()
decision_tree_model = DecisionTreeClassifier()
gaussian_nb_model = GaussianNB()
kmeans_model = KMeans(n_clusters=2)  # Number of clusters, you can adjust this

# Train the models
models = [logistic_model, svm_model, decision_tree_model, gaussian_nb_model, kmeans_model]
model_names = ["Logistic Regression", "SVM", "Decision Tree", "Gaussian Naive Bayes", "K-Means Clustering"]

for model, name in zip(models, model_names):
    if name != "K-Means Clustering":
        model.fit(X_train, Y_train)
        X_train_prediction = model.predict(X_train)
        training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
        print(f"Accuracy on {name} Training Data: {training_data_accuracy:.2%}")

        X_test_prediction = model.predict(X_test)
        test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
        print(f"Accuracy on {name} Test Data: {test_data_accuracy:.2%}")
    else:
        model.fit(X_train)
        cluster_labels = model.predict(X_train)
        print(f"Cluster Labels on Training Data: {cluster_labels}")

# Building the Predictive System
input_data = (54, 1, 2, 120, 258, 0, 0, 147, 0, 0.4, 1, 0, 3)

# Change the input data into an np array
input_data_nparray = np.asarray(input_data)

# Reshape the np array
input_data_reshaped = input_data_nparray.reshape(1, -1)

for model, name in zip(models, model_names):
    if name != "K-Means Clustering":
        prediction = model.predict(input_data_reshaped)
        if prediction[0] == 0:
            print(f"Using {name}: The person does not have any Heart Disease")
        else:
            print(f"Using {name}: The person has Heart Disease")
    else:
        cluster_label = model.predict(input_data_reshaped)
        print(f"Using {name}: Belongs to Cluster {cluster_label[0]}")


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  super()._check_params_vs_input(X, default_n_init=10)


Accuracy on Logistic Regression Training Data: 85.24%
Accuracy on Logistic Regression Test Data: 80.49%
Accuracy on SVM Training Data: 72.07%
Accuracy on SVM Test Data: 70.73%
Accuracy on Decision Tree Training Data: 100.00%
Accuracy on Decision Tree Test Data: 100.00%
Accuracy on Gaussian Naive Bayes Training Data: 83.90%
Accuracy on Gaussian Naive Bayes Test Data: 78.05%
Cluster Labels on Training Data: [0 1 1 0 0 1 0 1 0 1 1 1 1 0 1 0 1 1 1 0 0 1 1 0 0 1 0 1 0 1 0 0 0 1 0 1 1
 0 1 1 0 0 0 0 1 1 1 1 0 1 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 0 1 0 0 1 1 0 1 1
 0 0 1 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 1 0 1 1 1 1 0 1 0 0 1 0 1 0 1 0
 0 0 1 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 1
 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 1 0 1 0 1 1 0 0 1 1 0 1 0 1 1 1
 0 0 1 1 1 0 0 0 0 1 0 1 0 0 1 0 0 0 1 0 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 0 0
 0 0 1 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 1 1 0 0 1 0 0 1 0 1 1 0 1 0 1 0
 0 0 0 1 0 1 0 1 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 1 1 0 1 0 0 1 0 0

