In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
heart_data = pd.read_csv('heart.csv')

# Feature Normalization
scaler = StandardScaler()
features_to_scale = ['age', 'trtbps', 'chol', 'thalachh', 'oldpeak']
heart_data[features_to_scale] = scaler.fit_transform(heart_data[features_to_scale])

# Data Splitting
X = heart_data.drop('output', axis=1)  # All columns except the target
y = heart_data['output']               # Target column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Initialize the Logistic Regression model
model = LogisticRegression()

# Train the model
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", report)

Accuracy: 0.8524590163934426
Confusion Matrix:
 [[25  4]
 [ 5 27]]
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.86      0.85        29
           1       0.87      0.84      0.86        32

    accuracy                           0.85        61
   macro avg       0.85      0.85      0.85        61
weighted avg       0.85      0.85      0.85        61


In [3]:
from sklearn.model_selection import GridSearchCV

# Define a set of parameters for tuning
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'solver': ['liblinear', 'lbfgs']  # Different algorithms for optimization
}

# Initialize GridSearchCV
grid_search = GridSearchCV(LogisticRegression(), param_grid, cv=5, scoring='accuracy')

# Fit GridSearchCV
grid_search.fit(X_train, y_train)

# Best parameters and best score
print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score: {:.2f}".format(grid_search.best_score_))


Best parameters: {'C': 1, 'solver': 'liblinear'}
Best cross-validation score: 0.83


In [4]:
from sklearn.tree import DecisionTreeClassifier

# Initialize the Decision Tree model
tree_model = DecisionTreeClassifier(random_state=42)

# Train the model
tree_model.fit(X_train, y_train)

# Predict on the test set
y_pred_tree = tree_model.predict(X_test)

# Evaluate the model
accuracy_tree = accuracy_score(y_test, y_pred_tree)
print("Decision Tree Accuracy:", accuracy_tree)

Decision Tree Accuracy: 0.7540983606557377


In [5]:
from sklearn.ensemble import RandomForestClassifier

# Initialize the Random Forest model
forest_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
forest_model.fit(X_train, y_train)

# Predict on the test set
y_pred_forest = forest_model.predict(X_test)

# Evaluate the model
accuracy_forest = accuracy_score(y_test, y_pred_forest)
print("Random Forest Accuracy:", accuracy_forest)

Random Forest Accuracy: 0.8360655737704918


In [6]:
from sklearn.ensemble import GradientBoostingClassifier

# Initialize the GBM model
gbm_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)

# Train the model
gbm_model.fit(X_train, y_train)

# Predict on the test set
y_pred_gbm = gbm_model.predict(X_test)

# Evaluate the model
accuracy_gbm = accuracy_score(y_test, y_pred_gbm)
print("GBM Accuracy:", accuracy_gbm)

GBM Accuracy: 0.7868852459016393


In [7]:
from sklearn.svm import SVC

# Initialize the SVM model
svm_model = SVC(kernel='linear')

# Train the model
svm_model.fit(X_train, y_train)

# Predict on the test set
y_pred_svm = svm_model.predict(X_test)

# Evaluate the model
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print("SVM Accuracy:", accuracy_svm)

SVM Accuracy: 0.8688524590163934


In [8]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    return accuracy, precision, recall, f1

# Calculate metrics for each model
metrics_tree = calculate_metrics(y_test, y_pred_tree)
metrics_forest = calculate_metrics(y_test, y_pred_forest)
metrics_gbm = calculate_metrics(y_test, y_pred_gbm)
metrics_svm = calculate_metrics(y_test, y_pred_svm)

In [9]:
# Prepare the data for a DataFrame
data = {
    'Model': ['Decision Tree', 'Random Forest', 'Gradient Boosting', 'Support Vector Machine'],
    'Accuracy': [metrics_tree[0], metrics_forest[0], metrics_gbm[0], metrics_svm[0]],
    'Precision': [metrics_tree[1], metrics_forest[1], metrics_gbm[1], metrics_svm[1]],
    'Recall': [metrics_tree[2], metrics_forest[2], metrics_gbm[2], metrics_svm[2]],
    'F1 Score': [metrics_tree[3], metrics_forest[3], metrics_gbm[3], metrics_svm[3]]
}

# Convert to DataFrame
results_df = pd.DataFrame(data)

In [ ]:
# Set up the matplotlib figure
plt.figure(figsize=(14, 8))

# Plot each metric in a subplot
for i, metric in enumerate(['Accuracy', 'Precision', 'Recall', 'F1 Score'], 1):
    plt.subplot(2, 2, i)
    plt.bar(results_df['Model'], results_df[metric], color=['blue', 'green', 'red', 'purple'])
    plt.title(metric)
    plt.xticks(rotation=45)
    plt.ylabel(metric)

plt.tight_layout()
plt.show()