In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
from xgboost import XGBClassifier

data = pd.read_csv("modified_training.csv")

X = data.drop(columns=["increase_stock"])
y = data["increase_stock"]

y = y.map({"low_bike_demand": 0, "high_bike_demand": 1})

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

xgb = XGBClassifier(eval_metric="logloss", random_state=42)

xgb.fit(X_train, y_train)

y_pred = xgb.predict(X_val)

print("Confusion Matrix:\n", confusion_matrix(y_val, y_pred))
print("Classification Report:\n", classification_report(y_val, y_pred))
print("Accuracy Score:", accuracy_score(y_val, y_pred))
print("F1 Score:", f1_score(y_val, y_pred))

param_grid = {
    "n_estimators": [50, 100, 200, 300],
    "learning_rate": [0.01, 0.05, 0.1, 0.2],
    "max_depth": [3, 5, 7, 9, 11],
    "min_child_weight": [1, 2, 3],
    "subsample": [0.5, 0.7, 0.8, 1.0],
    "colsample_bytree": [0.5, 0.7, 0.8, 1.0],
    "gamma": [0, 0.1, 0.5, 1]
}

random_search = RandomizedSearchCV(
    estimator=xgb,
    param_distributions=param_grid,
    n_iter=100,
    scoring='f1',
    cv=3,
    verbose=2,
    n_jobs=-1,
    random_state=42
)

random_search.fit(X_train, y_train)

print("Best Parameters from Random Search:", random_search.best_params_)

best_xgb = random_search.best_estimator_

y_pred = best_xgb.predict(X_val)

print("Confusion Matrix:\n", confusion_matrix(y_val, y_pred))
print("Classification Report:\n", classification_report(y_val, y_pred))
print("Accuracy Score:", accuracy_score(y_val, y_pred))
print("F1 Score:", f1_score(y_val, y_pred))

Confusion Matrix:
 [[254   8]
 [ 18  40]]
Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.97      0.95       262
           1       0.83      0.69      0.75        58

    accuracy                           0.92       320
   macro avg       0.88      0.83      0.85       320
weighted avg       0.92      0.92      0.92       320

Accuracy Score: 0.91875
F1 Score: 0.7547169811320755
Fitting 3 folds for each of 100 candidates, totalling 300 fits
Best Parameters from Random Search: {'subsample': 1.0, 'n_estimators': 50, 'min_child_weight': 2, 'max_depth': 5, 'learning_rate': 0.1, 'gamma': 0.5, 'colsample_bytree': 0.8}
Confusion Matrix:
 [[257   5]
 [ 13  45]]
Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.98      0.97       262
           1       0.90      0.78      0.83        58

    accuracy                           0.94       320
   macro avg       0.93      0.8