In [3]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

X_train = np.load("dataset/X_train_binary.npy")
X_test = np.load("dataset/X_test_binary.npy")
y_train = np.load("dataset/y_train_binary.npy")
y_test = np.load("dataset/y_test_binary.npy")

rf_binary = RandomForestClassifier(
    n_estimators=30,
    max_depth=20,
    random_state=42,
    class_weight='balanced',
    n_jobs=-1
)


rf_binary.fit(X_train, y_train)

y_pred_binary = rf_binary.predict(X_test)

print("Binary Accuracy:", accuracy_score(y_test, y_pred_binary))
print(classification_report(y_test, y_pred_binary))

Binary Accuracy: 0.9989155585521764
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    681396
           1       1.00      1.00      1.00    166967

    accuracy                           1.00    848363
   macro avg       1.00      1.00      1.00    848363
weighted avg       1.00      1.00      1.00    848363



In [5]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import time

# -----------------------------
# Load Multiclass Split
# -----------------------------
X_train = np.load("dataset/X_train_multi.npy")
X_test = np.load("dataset/X_test_multi.npy")
y_train = np.load("dataset/y_train_multi.npy")
y_test = np.load("dataset/y_test_multi.npy")

print("Training shape:", X_train.shape)
print("Test shape:", X_test.shape)

# -----------------------------
# Train Random Forest
# -----------------------------
start_time = time.time()

rf_multi = RandomForestClassifier(
    n_estimators=50,
    max_depth=None,
    random_state=42,
    class_weight='balanced',
    n_jobs=-1
)

rf_multi.fit(X_train, y_train)

end_time = time.time()

print("\nTraining Time:", round(end_time - start_time, 2), "seconds")

# -----------------------------
# Predictions
# -----------------------------
y_pred_multi = rf_multi.predict(X_test)

print("\nMulticlass Accuracy:", accuracy_score(y_test, y_pred_multi))
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred_multi))

# -----------------------------
# Save Predictions
# -----------------------------
np.save("dataset/y_pred_rf_multi.npy", y_pred_multi)
np.save("dataset/y_prob_rf_multi.npy", rf_multi.predict_proba(X_test))

print("\nMulticlass RF predictions saved.")


Training shape: (1979513, 78)
Test shape: (848363, 78)

Training Time: 165.99 seconds

Multiclass Accuracy: 0.9985383615268464

Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00    681396
           1       0.89      0.73      0.80       587
           2       1.00      1.00      1.00     38408
           3       1.00      0.99      0.99      3088
           4       1.00      1.00      1.00     69037
           5       0.99      0.99      0.99      1650
           6       1.00      1.00      1.00      1739
           7       1.00      1.00      1.00      2380
           8       0.47      0.36      0.41       216
           9       0.99      1.00      0.99     47641
          10       1.00      1.00      1.00      1769
          11       0.75      0.78      0.76       452

    accuracy                           1.00    848363
   macro avg       0.92      0.90      0.91    848363
weighted avg       1.00      1.00   

In [7]:
np.save("dataset/y_pred_rf_binary.npy", y_pred_binary)
np.save("dataset/y_test_binary.npy", y_test)