In [17]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression,LogisticRegression
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.svm import SVC
from sklearn.metrics import classification_report, mean_squared_error, mean_absolute_error, r2_score, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

In [18]:
data= pd.read_csv('heart.csv')
(data.head(10))

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
5,58,0,0,100,248,0,0,122,0,1.0,1,0,2,1
6,58,1,0,114,318,0,2,140,0,4.4,0,3,1,0
7,55,1,0,160,289,0,0,145,1,0.8,1,1,3,0
8,46,1,0,120,249,0,0,144,0,0.8,2,0,3,0
9,54,1,0,122,286,0,0,116,1,3.2,1,2,2,0


#### age
#### sex
#### chest pain type (4 values)
#### resting blood pressure
#### serum cholestoral in mg/dl
#### fasting blood sugar > 120 mg/dl
#### resting electrocardiographic results (values 0,1,2)
#### maximum heart rate achieved
#### exercise induced angina
#### oldpeak = ST depression induced by exercise relative to rest
#### the slope of the peak exercise ST segment
#### number of major vessels (0-3) colored by flourosopy
#### thal: 0 = normal; 1 = fixed defect; 2 = reversable defect

## Logistic Regression

In [19]:
# Define features and target variable
X = data[['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal']]  # Features
y = data['target']  # Target

# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create the Logistic Regression model
log_reg = LogisticRegression()

# Cross-validation to assess generalization
cv_scores = cross_val_score(log_reg, X_train_scaled, y_train, cv=5, scoring='accuracy')
print(f"Cross-Validation Accuracy scores: {cv_scores}")
print(f"Average Cross-Validation Accuracy: {np.mean(cv_scores):.4f}")

# Train the model on the training data
log_reg.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = log_reg.predict(X_test_scaled)
y_pred_proba = log_reg.predict_proba(X_test_scaled)[:, 1]  # Probabilities for ROC/AUC

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_proba)
conf_matrix = confusion_matrix(y_test, y_pred)

# Display evaluation results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC AUC Score: {roc_auc:.4f}")
print(f"Confusion Matrix:\n{conf_matrix}")

# Print a comparison of actual vs predicted values for better understanding
comparison = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print("\nComparison of Actual and Predicted Values:")
print(comparison.head())

Cross-Validation Accuracy scores: [0.83536585 0.85365854 0.8902439  0.86585366 0.79878049]
Average Cross-Validation Accuracy: 0.8488
Accuracy: 0.7951
Precision: 0.7563
Recall: 0.8738
F1 Score: 0.8108
ROC AUC Score: 0.8787
Confusion Matrix:
[[73 29]
 [13 90]]

Comparison of Actual and Predicted Values:
     Actual  Predicted
527       1          1
359       1          1
447       0          0
31        1          1
621       0          0


## SVM

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features (SVM performs better with standardized data)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize the Support Vector Classifier (SVC) model
model = SVC(kernel='linear', random_state=42)  # You can also try 'rbf', 'poly', or other kernels

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.8146341463414634

Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.71      0.79       102
           1       0.76      0.92      0.83       103

    accuracy                           0.81       205
   macro avg       0.83      0.81      0.81       205
weighted avg       0.83      0.81      0.81       205

