In [None]:
# Library Imports

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [None]:
# Setup and Load Data

from pathlib import Path

cwd = Path.cwd()
BASE_DIR = cwd.parent

MODELS_DIR = BASE_DIR / "models"
DATA_PROCESSED_DIR = BASE_DIR / "data" / "processed"
REPORTS_DIR = BASE_DIR / "reports"
FIGURES_DIR = REPORTS_DIR / "figures"

DATA_PROCESSED_DIR, REPORTS_DIR, FIGURES_DIR

train_path = DATA_PROCESSED_DIR / "panel_country_split_train.csv"
test_path = DATA_PROCESSED_DIR / "panel_country_split_test.csv"

train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

print("Train Shape:", train_df.shape)
print("Test Shape :", test_df.shape)
train_df.head()

In [None]:
# X & Y Split

target_col = "fertility"

X_train = train_df.drop(columns=[target_col, "Country"])  
y_train = train_df[target_col]

X_test = test_df.drop(columns=[target_col, "Country"])
y_test = test_df[target_col]

In [None]:
# Random Forest Model

rf = RandomForestRegressor(
    n_estimators=300,
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features="sqrt",
    random_state=42,
    n_jobs=-1
)

rf

In [None]:
# Train the Model

rf.fit(X_train, y_train)

In [None]:
# Get Predictions

y_pred = rf.predict(X_test)

print("First 10 Predictions:", y_pred[:10])
print("First 10 True Values:", y_test.values[:10])

In [None]:
# Performance Metrics

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"Random Forest - MAE : {mae:.4f}")
print(f"Random Forest - RMSE: {rmse:.4f}")
print(f"Random Forest - RÂ²  : {r2:.44f}")

In [None]:
# Results DataFrame

results_rf = pd.DataFrame({
    "model": ["RandomForestRegressor"],
    "MAE": [mae],
    "RMSE": [rmse],
    "R2": [r2]
})

results_rf

In [None]:
# Feature Importance Graphic

importances = rf.feature_importances_
indices = np.argsort(importances)[::-1]
feature_names = X_train.columns

plt.figure(figsize=(10, 6))
plt.title("Feature Importances - Random Forest")
plt.bar(range(len(importances)), importances[indices])
plt.xticks(range(len(importances)), feature_names[indices], rotation=90)
plt.tight_layout()

# Save Figure
save_path = FIGURES_DIR / "feature_importances_random_forest.png"
plt.savefig(save_path, dpi=300, bbox_inches="tight")

plt.show()

print(f"Saved to: {save_path}")

In [None]:
# Save Model Results

import joblib

MODELS_DIR.mkdir(parents=True, exist_ok=True)

model_path = MODELS_DIR / "random_forest_fertility.pkl"
joblib.dump(rf, model_path)

print(f"Model Saved to: {model_path}")