In [8]:
import pickle
import pandas as pd
import requests
import io
import zipfile
import os

from sklearn.metrics import mean_squared_error,mean_absolute_error, r2_score

In [2]:
url = 'https://github.com/LM-UCSD/Group5_Capstone_WI25/raw/main/models/lr_model.pkl'

response = requests.get(url)
lr_model = pickle.load(io.BytesIO(response.content))

In [3]:
url = 'https://github.com/LM-UCSD/Group5_Capstone_WI25/raw/main/models/rf_model.pkl'

response = requests.get(url)
rf_model = pickle.load(io.BytesIO(response.content))

In [4]:
url = 'https://github.com/LM-UCSD/Group5_Capstone_WI25/raw/main/models/xgb_model.pkl'

response = requests.get(url)
xgb_model = pickle.load(io.BytesIO(response.content))

In [5]:
print("Linear Regression Model:", lr_model)
print("Random Forest Model:", rf_model)
print("XG Boosting Model:", xgb_model)

Linear Regression Model: LinearRegression()
Random Forest Model: MultiOutputRegressor(estimator=RandomForestRegressor(n_estimators=5, n_jobs=-1,
                                                     random_state=20))
XG Boosting Model: MultiOutputRegressor(estimator=XGBRegressor(base_score=None, booster=None,
                                            callbacks=None,
                                            colsample_bylevel=None,
                                            colsample_bynode=None,
                                            colsample_bytree=None,
                                            device='cuda',
                                            early_stopping_rounds=None,
                                            enable_categorical=False,
                                            eval_metric=None,
                                            feature_types=None, gamma=None,
                                            grow_policy=None,
                           

In [13]:
## Load Aggregate Sampled Data from Notebook 3

url = 'https://github.com/LM-UCSD/Group5_Capstone_WI25/raw/main/data/Train_Test.zip'

response = requests.get(url)

with open('Train_Test.zip', 'wb') as f:
    f.write(response.content)

unzip_dir = os.getcwd()

with zipfile.ZipFile('Train_Test.zip', 'r') as zip_ref:
    zip_ref.extractall(unzip_dir)

X_test = pd.read_csv(os.path.join(unzip_dir, 'Train_Test/X_test.csv'), index_col=0)

y_test = pd.read_csv(os.path.join(unzip_dir, 'Train_Test/y_test.csv'),index_col=0)

In [None]:
lr_preds = lr_model.predict(X_test)
rf_preds = rf_model.predict(X_test)
xgb_preds = xgb_model.predict(X_test)

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




In [None]:
## RMSE, MAE, R^2?

lr_rmse = mean_squared_error(y_test, lr_preds, squared=False)
print(f'Linear Regression RMSE: {lr_rmse.mean()}')

rf_rmse = mean_squared_error(y_test, rf_preds, squared=False, multioutput='raw_values')
print(f'Random Forest RMSE per flag: {rf_rmse.mean()}')

xgb_rmse = mean_squared_error(y_test, xgb_preds, squared=False, multioutput='raw_values')
print(f'Gradient Boosting RMSE per flag: {xgb_rmse.mean()}')

lr_mae = mean_absolute_error(y_test, lr_preds)
print(f'Linear Regression MAE: {lr_mae}')

rf_mae = mean_absolute_error(y_test, rf_preds)
print(f'Random Forest MAE per flag: {rf_mae}')

xgb_mae = mean_absolute_error(y_test, xgb_preds)
print(f'Gradient Boosting MAE per flag: {xgb_mae}')

overall_r2_lr = r2_score(y_test, lr_preds)
overall_r2_rf = r2_score(y_test, rf_preds)
overall_r2_xgb = r2_score(y_test, xgb_preds)

print("Overall R^2 (Linear Regression):", overall_r2_lr)
print("Overall R^2 (Random Forest):", overall_r2_rf)
print("Overall R^2 (XG Gradient Boosting):", overall_r2_xgb)