In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

In [4]:
path = "C:\\Users\\Republic Of Gamers\\OneDrive\\Documents\\GitHub\\TSDN-BoyWithLuv\\Source\\Data\\sdm_ts_emergency_daily.csv"
df = pd.read_csv(path)
df.head()

Unnamed: 0,Date of Admission,Emergency,"('Emergency', 'Female')","('Emergency', 'Male')",Emergency_Lag_1,Emergency_Lag_2,Emergency_Lag_3,month,day,year,quarter,dayofweek,dayofyear
0,2019-05-11,5,3,2,1,9,14,5,11,2019,2,5,131
1,2019-05-12,10,7,3,5,1,9,5,12,2019,2,6,132
2,2019-05-13,9,5,4,10,5,1,5,13,2019,2,0,133
3,2019-05-14,13,6,7,9,10,5,5,14,2019,2,1,134
4,2019-05-15,15,8,7,13,9,10,5,15,2019,2,2,135


In [5]:
df.set_index('Date of Admission', inplace=True)

In [6]:
trainEmergencySize = int(len(df) * 0.7)
trainEmergency, testEmergency= df[:trainEmergencySize], df[trainEmergencySize:]

In [7]:
X_train = trainEmergency.drop(columns=['Emergency'])
y_train = trainEmergency['Emergency']
X_test = testEmergency.drop(columns=['Emergency'])
y_test = testEmergency['Emergency']
X_train.columns = X_train.columns.astype(str)
X_test.columns = X_test.columns.astype(str)

In [8]:
# Define the parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

# Initialize the model
rf = RandomForestRegressor()

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1, verbose=2)

# Fit to the data
grid_search.fit(X_train, y_train)

# Best parameters
print("Best parameters found: ", grid_search.best_params_)

Fitting 3 folds for each of 36 candidates, totalling 108 fits
Best parameters found:  {'max_depth': 20, 'min_samples_split': 2, 'n_estimators': 100}


In [9]:
# Extract the best parameters from the grid search
best_params = grid_search.best_params_

# Initialize the RandomForestRegressor with the best parameters
best_rf = RandomForestRegressor(**best_params)

# Fit the model to the full training data
best_rf.fit(X_train, y_train)

# Make predictions on the test set or future data
y_pred = best_rf.predict(X_test)

# Display predictions
print(y_pred)

[ 8.   13.12  2.98  6.9  14.06 15.95  8.    6.   17.1  13.11  3.08 10.
  9.    9.   12.   13.    6.18  3.99 13.    8.   11.    7.   11.98  6.
 16.02  9.02  5.89 10.99 11.   10.    9.06  6.   11.   12.01  8.01 14.
  9.    9.   10.29  9.    8.    6.    2.93 15.67  8.    2.94  9.   17.48
 10.02 10.    3.13  7.   10.   18.78  7.    8.   10.   16.11 10.02 11.01
 12.01  6.    3.44 11.   15.76  6.   11.01 13.04 11.   10.46  9.    5.
 14.18 11.03 11.   10.    6.    6.   10.   11.    7.   13.06 10.   10.01
 16.88 12.   11.   13.   14.28  8.   13.1   8.   12.   11.04  6.98 13.
  7.   16.05 11.   15.04 10.   15.04 10.02  9.    8.   12.   17.14  3.1
 10.08  6.97  6.99 12.01 11.    6.   11.   13.02 10.    8.   13.1   6.
  7.18  6.    2.95 11.   10.   11.   13.06 15.11  9.   11.99  8.   14.
 10.    7.    9.   10.    8.   13.    7.   17.36 15.87  9.   16.06 16.5
  8.    4.83 12.    9.01 10.   10.   15.03 12.97 19.59 18.38  5.25 15.2
  6.    9.   15.74  8.   13.    6.   11.    5.41  6.99  6.99  6.    

In [10]:
# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate MAE
mae = mean_absolute_error(y_test, y_pred)

# Calculate R-squared
r2 = r2_score(y_test, y_pred)

# Display the metrics
print("Root Mean Squared Error (RMSE):", rmse)
print("Mean Absolute Error (MAE):", mae)
print("R-squared (R²):", r2)

Root Mean Squared Error (RMSE): 0.27728973753124364
Mean Absolute Error (MAE): 0.08147810218978095
R-squared (R²): 0.9932774845666252


In [11]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}
xgb_model = XGBRegressor()

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1, verbose=2)

# Fit to the data
grid_search.fit(X_train, y_train)

# Best parameters
print("Best parameters found: ", grid_search.best_params_)

Fitting 3 folds for each of 36 candidates, totalling 108 fits
Best parameters found:  {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}


Parameters: { "min_samples_split" } are not used.



In [12]:
# Extract the best parameters from the grid search
best_params = grid_search.best_params_

# Initialize the XGBRegressor with the best parameters
best_xgb = XGBRegressor(**best_params)

# Fit the model to the full training data
best_xgb.fit(X_train, y_train)

# Make predictions on the test set or future data
y_pred = best_xgb.predict(X_test)

# Display predictions
print("Predictions:", y_pred)

Parameters: { "min_samples_split" } are not used.



Predictions: [ 8.009224  12.975933   3.0564647  6.9508557 13.994575  15.918755
  8.010642   6.0288305 17.167965  12.966652   3.0383747 10.026951
  9.032791   8.980673  11.990371  13.034095   5.956457   4.0017233
 13.010713   7.990728  10.957498   6.9530573 11.976558   6.002575
 15.979272   9.038086   6.035554  11.044624  10.96995    9.987145
  8.995973   6.004911  11.025068  12.006584   7.9899807 13.97628
  9.027761   9.078722  10.6945095  9.051125   8.001822   5.999131
  3.0290437 15.44402    7.992163   3.0477788  9.011635  16.936642
  9.914825  10.022533   3.033086   6.965732   9.998768  19.069191
  6.9969935  7.991158   9.971505  16.380232  10.024043  11.046279
 11.997208   6.047348   3.0219884 11.013279  15.813795   6.0098505
 11.02738   12.926716  11.012308   9.96611    8.990223   4.992188
 14.031143  11.045948  11.006215   9.996945   6.0234494  6.002029
 10.035821  10.926953   6.959527  12.948402  10.016672   9.904174
 17.23644   11.982842  11.042905  13.00598   14.188163   8.037

In [13]:
# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate MAE
mae = mean_absolute_error(y_test, y_pred)

# Calculate R-squared
r2 = r2_score(y_test, y_pred)

# Display the metrics
print("Root Mean Squared Error (RMSE):", rmse)
print("Mean Absolute Error (MAE):", mae)
print("R-squared (R²):", r2)

Root Mean Squared Error (RMSE): 0.16359646145062273
Mean Absolute Error (MAE): 0.06297126173102943
R-squared (R²): 0.9976600408554077


In [14]:
joblib.dump(best_xgb, 'xgb_emergency.pkl')

['xgb_emergency.pkl']