In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer


In [2]:
data = pd.read_csv('heart_disease.csv')

In [3]:
X = data.drop("target", axis=1)
y = data["target"]


In [4]:
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns

In [5]:
numerical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

In [6]:
preprocessor = ColumnTransformer([
    ('num', numerical_pipeline, numerical_features)
])

In [7]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42, n_estimators=100, class_weight='balanced'))
])


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:
pipeline.fit(X_train, y_train)


In [10]:
y_pred = pipeline.predict(X_test)

In [11]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.9853658536585366
Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.99       102
           1       1.00      0.97      0.99       103

    accuracy                           0.99       205
   macro avg       0.99      0.99      0.99       205
weighted avg       0.99      0.99      0.99       205

Confusion Matrix:
 [[102   0]
 [  3 100]]


In [12]:
X_new = pd.DataFrame({
    'age': [45],
    'sex': [1],
    'cp': [3],
    'trestbps': [120],
    'chol': [240],
    'fbs': [0],
    'restecg': [1],
    'thalach': [150],
    'exang': [0],
    'oldpeak': [1.2],
    'slope': [2],
    'ca': [0],
    'thal': [2]
})


In [13]:
prediction = pipeline.predict(X_new)
print(f"Predicted Disease: {prediction}")

Predicted Disease: [1]
