In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.model_selection import GridSearchCV
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [4]:
path = "C:\\Users\\Republic Of Gamers\\OneDrive\\Documents\\GitHub\\TSDN-BoyWithLuv\\Source\\Data\\sdm_emergency_monthly.csv"
emergency_df = pd.read_csv(path)
emergency_df.head()

Unnamed: 0,Emergency,"('Emergency', 'A')","('Emergency', 'AB')","('Emergency', 'B')","('Emergency', 'O')","('Emergency', 'Female')","('Emergency', 'Male')",Emergency_Lag_1,Emergency_Lag_2,Emergency_Lag_3
0,305,81,77,72,75,157,148,308.0,275.0,224.0
1,300,73,71,83,73,140,160,305.0,308.0,275.0
2,306,79,77,78,72,174,132,300.0,305.0,308.0
3,284,73,60,75,76,144,140,306.0,300.0,305.0
4,308,79,72,87,70,168,140,284.0,306.0,300.0


In [5]:
emergency_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 58 entries, 0 to 57
Data columns (total 10 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Emergency                58 non-null     int64  
 1   ('Emergency', 'A')       58 non-null     int64  
 2   ('Emergency', 'AB')      58 non-null     int64  
 3   ('Emergency', 'B')       58 non-null     int64  
 4   ('Emergency', 'O')       58 non-null     int64  
 5   ('Emergency', 'Female')  58 non-null     int64  
 6   ('Emergency', 'Male')    58 non-null     int64  
 7   Emergency_Lag_1          58 non-null     float64
 8   Emergency_Lag_2          58 non-null     float64
 9   Emergency_Lag_3          58 non-null     float64
dtypes: float64(3), int64(7)
memory usage: 4.7 KB


In [6]:
trainEmergencySize = int(len(emergency_df) * 0.7)
trainEmergency, testEmergency= emergency_df[:trainEmergencySize], emergency_df[trainEmergencySize:]

In [8]:
X_train = trainEmergency.drop(columns=['Emergency'])
y_train = trainEmergency['Emergency']
X_test = testEmergency.drop(columns=['Emergency'])
y_test = testEmergency['Emergency']
X_train.columns = X_train.columns.astype(str)
X_test.columns = X_test.columns.astype(str)

In [12]:
# Define the parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

# Initialize the model
rf = RandomForestRegressor()

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1, verbose=2)

# Fit to the data
grid_search.fit(X_train, y_train)

# Best parameters
print("Best parameters found: ", grid_search.best_params_)


Fitting 3 folds for each of 36 candidates, totalling 108 fits
Best parameters found:  {'max_depth': 20, 'min_samples_split': 2, 'n_estimators': 100}


In [13]:
# Extract the best parameters from the grid search
best_params = grid_search.best_params_

# Initialize the RandomForestRegressor with the best parameters
best_rf = RandomForestRegressor(**best_params)

# Fit the model to the full training data
best_rf.fit(X_train, y_train)

# Make predictions on the test set or future data
y_pred = best_rf.predict(X_test)

# Display predictions
print(y_pred)

[295.44 312.88 300.61 313.94 307.31 309.76 309.27 296.78 307.5  306.19
 311.9  299.49 311.57 291.32 304.22 295.47 321.16 275.13]


In [14]:
# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate MAE
mae = mean_absolute_error(y_test, y_pred)

# Calculate R-squared
r2 = r2_score(y_test, y_pred)

# Display the metrics
print("Root Mean Squared Error (RMSE):", rmse)
print("Mean Absolute Error (MAE):", mae)
print("R-squared (R²):", r2)

Root Mean Squared Error (RMSE): 50.136731932674756
Mean Absolute Error (MAE): 16.78666666666666
R-squared (R²): 0.21307777661831717


In [17]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}
xgb_model = XGBRegressor()

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1, verbose=2)

# Fit to the data
grid_search.fit(X_train, y_train)

# Best parameters
print("Best parameters found: ", grid_search.best_params_)

Fitting 3 folds for each of 36 candidates, totalling 108 fits
Best parameters found:  {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}


Parameters: { "min_samples_split" } are not used.



In [18]:
# Extract the best parameters from the grid search
best_params = grid_search.best_params_

# Initialize the XGBRegressor with the best parameters
best_xgb = XGBRegressor(**best_params)

# Fit the model to the full training data
best_xgb.fit(X_train, y_train)

# Make predictions on the test set or future data
y_pred = best_xgb.predict(X_test)

# Display predictions
print("Predictions:", y_pred)

Predictions: [292.31232 314.08145 290.99435 308.72064 314.971   325.9288  310.72794
 291.17084 309.0245  308.02124 313.0258  299.49484 309.73633 290.03476
 306.3696  306.29196 332.99887 275.39697]


Parameters: { "min_samples_split" } are not used.



In [19]:
# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate MAE
mae = mean_absolute_error(y_test, y_pred)

# Calculate R-squared
r2 = r2_score(y_test, y_pred)

# Display the metrics
print("Root Mean Squared Error (RMSE):", rmse)
print("Mean Absolute Error (MAE):", mae)
print("R-squared (R²):", r2)

Root Mean Squared Error (RMSE): 50.45518118792676
Mean Absolute Error (MAE): 17.723007202148438
R-squared (R²): 0.20304960012435913
