In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
import pickle

In [5]:
# Load dataset
heart = pd.read_csv("heart.csv")
print(heart.head())

   age  gender  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
0   63       1   3       145   233    1        0      150      0      2.3   
1   37       1   2       130   250    0        1      187      0      3.5   
2   41       0   1       130   204    0        0      172      0      1.4   
3   56       1   1       120   236    0        1      178      0      0.8   
4   57       0   0       120   354    0        1      163      1      0.6   

   slope  ca  thal  target  
0      0   0     1       1  
1      0   0     2       1  
2      2   0     2       1  
3      2   0     2       1  
4      2   0     2       1  


In [6]:
# Check class distribution
print(heart['target'].value_counts())

target
1    165
0    138
Name: count, dtype: int64


In [7]:
# Split the dataset into features and target variable
X = heart.drop(columns='target')
y = heart['target']

In [8]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [9]:
# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:

# Create and train the model
model = RandomForestClassifier(n_estimators=20, random_state=42)
model.fit(X_train_scaled, y_train)

In [11]:
# Make predictions
y_pred = model.predict(X_test_scaled)

In [12]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 82.89%


In [13]:
print('Classification Report\n', classification_report(y_test, y_pred))
print('Confusion Matrix\n', confusion_matrix(y_test, y_pred))

Classification Report
               precision    recall  f1-score   support

           0       0.82      0.80      0.81        35
           1       0.83      0.85      0.84        41

    accuracy                           0.83        76
   macro avg       0.83      0.83      0.83        76
weighted avg       0.83      0.83      0.83        76

Confusion Matrix
 [[28  7]
 [ 6 35]]


In [14]:
# Filter inputs that are predicted to be positive
positive_indices = [i for i, pred in enumerate(y_pred) if pred == 1]
positive_inputs = X_test.iloc[positive_indices]

In [15]:
# Display the inputs for positive predictions
print("Inputs predicted to be positive:")
print(positive_inputs)

Inputs predicted to be positive:
     age  gender  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
228   59       1   3       170   288    0        0      159      0      0.2   
111   57       1   2       150   126    1        1      173      0      0.2   
60    71       0   2       110   265    1        0      130      0      0.0   
9     57       1   2       150   168    0        1      174      0      1.6   
119   46       0   0       138   243    0        0      152      1      0.0   
5     57       1   0       140   192    0        1      148      0      0.4   
45    52       1   1       120   325    0        1      172      0      0.2   
118   46       0   1       105   204    0        1      172      0      0.0   
46    44       1   2       140   235    0        0      180      0      0.0   
125   34       0   1       118   210    0        1      192      0      0.7   
152   64       1   3       170   227    0        0      155      0      0.6   
272   67       1   

In [16]:
# Creating a pickle file for the classifier
filename = 'heart.pkl'
pickle.dump(model, open(filename, 'wb'))