In [1]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score

TRAIN_CSV = r"D:\All\USTH\B3\Mechine Learning in Medicine\data_lab1\mitbih_train.csv"
TEST_CSV  = r"D:\All\USTH\B3\Mechine Learning in Medicine\data_lab1\mitbih_test.csv"

train_df = pd.read_csv(TRAIN_CSV, header=None)
test_df  = pd.read_csv(TEST_CSV, header=None)

X_train = train_df.iloc[:, :-1].astype(np.float32).values
y_train = train_df.iloc[:, -1].astype(int).values

X_test  = test_df.iloc[:, :-1].astype(np.float32).values
y_test  = test_df.iloc[:, -1].astype(int).values

print("Train:", X_train.shape, "Test:", X_test.shape)
print("Class counts (train):", dict(pd.Series(y_train).value_counts().sort_index()))

Train: (87554, 187) Test: (21892, 187)
Class counts (train): {0: np.int64(72471), 1: np.int64(2223), 2: np.int64(5788), 3: np.int64(641), 4: np.int64(6431)}


In [None]:
configs = [
    {"n_estimators": 400, "max_depth": None, "class_weight": "balanced_subsample"},
    {"n_estimators": 100, "max_depth": None, "class_weight": None},
    {"n_estimators": 200, "max_depth": None, "class_weight": None},
    {"n_estimators": 200, "max_depth": 20,   "class_weight": None},
    {"n_estimators": 200, "max_depth": 20,   "class_weight": "balanced"},
]

In [3]:
results = []

for cfg in configs:
    print("\nTraining with config:", cfg)

    rf = RandomForestClassifier(
        n_estimators=cfg["n_estimators"],
        max_depth=cfg["max_depth"],
        class_weight=cfg["class_weight"],
        max_features="sqrt",
        n_jobs=-1,
        random_state=42
    )

    rf.fit(X_train, y_train)
    y_pred = rf.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    macro_f1 = f1_score(y_test, y_pred, average="macro")

    print("Accuracy:", acc)
    print("Macro F1:", macro_f1)

    results.append({
        "n_estimators": cfg["n_estimators"],
        "max_depth": cfg["max_depth"],
        "class_weight": cfg["class_weight"],
        "accuracy": acc,
        "macro_f1": macro_f1
    })



Training with config: {'n_estimators': 400, 'max_depth': None, 'class_weight': 'balanced_subsample'}
Accuracy: 0.9734149460990316
Macro F1: 0.8629338898767023

Training with config: {'n_estimators': 100, 'max_depth': None, 'class_weight': None}
Accuracy: 0.9746939521286314
Macro F1: 0.8748714871040442

Training with config: {'n_estimators': 200, 'max_depth': None, 'class_weight': None}
Accuracy: 0.9747853097021744
Macro F1: 0.8756267550906228

Training with config: {'n_estimators': 200, 'max_depth': 20, 'class_weight': None}
Accuracy: 0.9713594006943176
Macro F1: 0.8492244080429128

Training with config: {'n_estimators': 200, 'max_depth': 20, 'class_weight': 'balanced'}
Accuracy: 0.9723186552165175
Macro F1: 0.8532630178824443
