In [1]:

import pandas as pd
import numpy as np

np.random.seed(42)
n = 600

df = pd.DataFrame({
    'age': np.random.randint(29, 77, n),
    'sex': np.random.randint(0, 2, n),
    'cp': np.random.randint(0, 4, n),
    'trestbps': np.random.randint(94, 200, n),
    'chol': np.random.randint(126, 564, n),
    'fbs': np.random.randint(0, 2, n),
    'restecg': np.random.randint(0, 3, n),
    'thalach': np.random.randint(71, 202, n),
    'exang': np.random.randint(0, 2, n),
    'oldpeak': np.round(np.random.uniform(0, 6.2, n), 1),
    'slope': np.random.randint(0, 3, n),
    'ca': np.random.randint(0, 4, n),
    'thal': np.random.randint(0, 3, n),
})

# Target variable (synthetic rule-based)
df['target'] = ((df['age'] > 50).astype(int) +
                (df['chol'] > 240).astype(int) +
                (df['thalach'] < 120).astype(int) +
                (df['exang'] == 1).astype(int))

df['target'] = (df['target'] > 1).astype(int)

df.head()


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,67,0,3,102,451,0,0,168,0,1.3,2,1,2,1
1,57,0,2,141,270,1,0,179,1,4.4,0,3,1,1
2,43,0,2,165,504,0,0,121,0,2.4,0,2,0,0
3,71,0,0,152,470,1,0,155,0,4.1,2,0,1,1
4,36,1,3,180,153,0,1,197,1,2.2,1,3,2,0


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = df.drop('target', axis=1)
y = df['target']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42)

X_train[:5]

array([[ 0.08246611,  1.03736424, -1.30986645,  0.63992635, -1.04411714,
         0.96720415,  1.2306973 , -0.37942503, -0.96076892, -0.21244442,
        -0.04328664,  0.45267873,  0.01452823],
       [-1.02528765,  1.03736424, -1.30986645, -0.46694982,  1.6788892 ,
         0.96720415,  0.02215658,  1.68407543, -0.96076892, -0.26904595,
         1.19347454,  0.45267873,  0.01452823],
       [-1.68993991,  1.03736424,  0.50309749, -0.10884282,  0.80102447,
         0.96720415, -1.18638414,  0.58532843,  1.040833  ,  0.29696938,
         1.19347454,  1.35803619,  0.01452823],
       [ 1.41177063,  1.03736424, -1.30986645, -0.53206018, -0.81652258,
         0.96720415, -1.18638414,  0.18334782,  1.040833  ,  0.91958624,
        -1.28004782, -0.45267873,  0.01452823],
       [-1.24683841, -0.96398156,  0.50309749, -0.27161873,  0.91482175,
         0.96720415,  1.2306973 , -1.15658754,  1.040833  ,  0.97618777,
         1.19347454, -1.35803619,  0.01452823]])

In [5]:

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

model = RandomForestClassifier(n_estimators=300, random_state=42)
model.fit(X_train, y_train)

pred = model.predict(X_test)
accuracy = accuracy_score(y_test, pred)
accuracy


0.9666666666666667

In [7]:

print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       1.00      0.86      0.92        28
           1       0.96      1.00      0.98        92

    accuracy                           0.97       120
   macro avg       0.98      0.93      0.95       120
weighted avg       0.97      0.97      0.97       120

