In [62]:
import pandas as pd

df = pd.read_csv("archive/Balaji Fast Food Sales.csv")

df.head()

Unnamed: 0,order_id,date,item_name,item_type,item_price,quantity,transaction_amount,transaction_type,received_by,time_of_sale
0,1,07-03-2022,Aalopuri,Fastfood,20,13,260,,Mr.,Night
1,2,8/23/2022,Vadapav,Fastfood,20,15,300,Cash,Mr.,Afternoon
2,3,11/20/2022,Vadapav,Fastfood,20,1,20,Cash,Mr.,Afternoon
3,4,02-03-2023,Sugarcane juice,Beverages,25,6,150,Online,Mr.,Night
4,5,10-02-2022,Sugarcane juice,Beverages,25,8,200,Online,Mr.,Evening


In [63]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   order_id            1000 non-null   int64 
 1   date                1000 non-null   object
 2   item_name           1000 non-null   object
 3   item_type           1000 non-null   object
 4   item_price          1000 non-null   int64 
 5   quantity            1000 non-null   int64 
 6   transaction_amount  1000 non-null   int64 
 7   transaction_type    893 non-null    object
 8   received_by         1000 non-null   object
 9   time_of_sale        1000 non-null   object
dtypes: int64(4), object(6)
memory usage: 78.3+ KB


In [64]:
df.isnull().sum()

order_id                0
date                    0
item_name               0
item_type               0
item_price              0
quantity                0
transaction_amount      0
transaction_type      107
received_by             0
time_of_sale            0
dtype: int64

In [65]:
df['transaction_type'] = df['transaction_type'].fillna("Credit Card")
df.isnull().sum()

order_id              0
date                  0
item_name             0
item_type             0
item_price            0
quantity              0
transaction_amount    0
transaction_type      0
received_by           0
time_of_sale          0
dtype: int64

In [66]:
df.duplicated().sum()

0

In [67]:
df['received_by'] = df['received_by'].replace(['Mr.','Mrs.'],['Male','Female'])

In [68]:
df['date'] = df['date'].str.replace('/','-')
df['date']

0      07-03-2022
1       8-23-2022
2      11-20-2022
3      02-03-2023
4      10-02-2022
          ...    
995     3-19-2023
996     9-20-2022
997     1-26-2023
998     8-27-2022
999     5-29-2022
Name: date, Length: 1000, dtype: object

In [69]:
df['date'] = pd.to_datetime(df['date'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   order_id            1000 non-null   int64         
 1   date                1000 non-null   datetime64[ns]
 2   item_name           1000 non-null   object        
 3   item_type           1000 non-null   object        
 4   item_price          1000 non-null   int64         
 5   quantity            1000 non-null   int64         
 6   transaction_amount  1000 non-null   int64         
 7   transaction_type    1000 non-null   object        
 8   received_by         1000 non-null   object        
 9   time_of_sale        1000 non-null   object        
dtypes: datetime64[ns](1), int64(4), object(5)
memory usage: 78.3+ KB


In [70]:
unique_values = df.select_dtypes(include = 'object').apply(lambda x: x.unique())
for column, values in unique_values.items():
    print(column, values)

item_name ['Aalopuri' 'Vadapav' 'Sugarcane juice' 'Panipuri' 'Frankie' 'Sandwich'
 'Cold coffee']
item_type ['Fastfood' 'Beverages']
transaction_type ['Credit Card' 'Cash' 'Online']
received_by ['Male' 'Female']
time_of_sale ['Night' 'Afternoon' 'Evening' 'Morning' 'Midnight']


In [81]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
import optuna

def create_advanced_features(df):
    # Time-based features
    df['hour_of_day'] = pd.Categorical(df['time_of_sale'], 
                                     categories=['Morning', 'Afternoon', 'Evening', 'Night'],
                                     ordered=True).codes
    
    # Enhanced time-based features
    df['day_of_week'] = df['date'].dt.dayofweek
    df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)
    df['month'] = df['date'].dt.month
    df['quarter'] = df['date'].dt.quarter
    df['day_of_year'] = df['date'].dt.dayofyear
    df['week_of_year'] = df['date'].dt.isocalendar().week
    
    # Sales patterns
    daily_stats = df.groupby(['date', 'time_of_sale'])['transaction_amount'].agg([
        'mean', 'count', 'std', 'min', 'max'
    ]).reset_index()
    df = df.merge(daily_stats, on=['date', 'time_of_sale'], suffixes=('', '_daily'))
    
    # Rolling statistics
    df = df.sort_values('date')
    for window in [7, 14, 30]:
        df[f'rolling_{window}d_mean'] = df.groupby('time_of_sale')['transaction_amount'].transform(
            lambda x: x.rolling(window, min_periods=1).mean())
        df[f'rolling_{window}d_std'] = df.groupby('time_of_sale')['transaction_amount'].transform(
            lambda x: x.rolling(window, min_periods=1).std())
    
    return df

def create_ensemble_model(params):
    rf = RandomForestRegressor(
        n_estimators=params['rf_n_estimators'],
        max_depth=params['rf_max_depth'],
        min_samples_split=params['rf_min_samples_split'],
        min_samples_leaf=params['rf_min_samples_leaf'],
        random_state=42,
        n_jobs=-1
    )
    
    gb = GradientBoostingRegressor(
        n_estimators=params['gb_n_estimators'],
        learning_rate=params['gb_learning_rate'],
        max_depth=params['gb_max_depth'],
        subsample=params['gb_subsample'],
        random_state=42
    )
    
    return VotingRegressor([
        ('rf', rf),
        ('gb', gb)
    ])

def objective(trial):
    params = {
        'rf_n_estimators': trial.suggest_int('rf_n_estimators', 1000, 3000),
        'rf_max_depth': trial.suggest_int('rf_max_depth', 15, 40),
        'rf_min_samples_split': trial.suggest_int('rf_min_samples_split', 2, 15),
        'rf_min_samples_leaf': trial.suggest_int('rf_min_samples_leaf', 1, 8),
        
        'gb_n_estimators': trial.suggest_int('gb_n_estimators', 200, 1000),
        'gb_learning_rate': trial.suggest_float('gb_learning_rate', 0.001, 0.1, log=True),
        'gb_max_depth': trial.suggest_int('gb_max_depth', 3, 12),
        'gb_subsample': trial.suggest_float('gb_subsample', 0.6, 1.0)
    }
    
    ensemble = create_ensemble_model(params)
    cv_scores = cross_val_score(ensemble, X_train, y_train, cv=5, scoring='r2', n_jobs=-1)
    return cv_scores.mean()

def predict_staffing_needs(model, scaler, date, time_of_day, features_list):
    """
    Predict staffing needs based on expected sales
    """
    # Create feature vector for prediction
    prediction_df = pd.DataFrame({
        'date': [date],
        'time_of_sale': [time_of_day]
    })
    
    # Create features for prediction
    prediction_df = create_advanced_features(prediction_df)
    X_pred = prediction_df[features_list].fillna(0)  # Fill NA with 0 for prediction
    
    # Scale features
    X_pred_scaled = scaler.transform(X_pred)
    
    # Predict sales
    predicted_sales = model.predict(X_pred_scaled)[0]
    
    # Determine staffing needs based on predicted sales
    if predicted_sales < 100:
        return 1, predicted_sales
    elif predicted_sales < 250:
        return 2, predicted_sales
    elif predicted_sales < 400:
        return 3, predicted_sales
    elif predicted_sales < 600:
        return 4, predicted_sales
    else:
        return 5, predicted_sales

# Main execution

if __name__ == "__main__":
    # Read and prepare data
    df = pd.read_csv("archive/Balaji Fast Food Sales.csv")
    
    # First standardize the date format by replacing '/' with '-'
    df['date'] = df['date'].str.replace('/', '-')
    
    # Then convert to datetime with 'mixed' format
    df['date'] = pd.to_datetime(df['date'], format='mixed')
    
    # Create advanced features
    df = create_advanced_features(df)
    
    # Select features
    features = [
        'day_of_week', 'month', 'day_of_year', 'hour_of_day', 'is_weekend',
        'quarter', 'week_of_year', 'mean', 'count', 'std', 'min', 'max',
        'rolling_7d_mean', 'rolling_7d_std',
        'rolling_14d_mean', 'rolling_14d_std',
        'rolling_30d_mean', 'rolling_30d_std'
    ]
    
    # Prepare data
    X = df[features].fillna(method='ffill').fillna(method='bfill')
    y = df['transaction_amount']
    
    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled = pd.DataFrame(X_scaled, columns=features)
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.2, random_state=42
    )
    
    # Optimize hyperparameters
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=50)
    
    # Train final model with best parameters
    best_params = study.best_params
    final_ensemble = create_ensemble_model(best_params)
    final_ensemble.fit(X_train, y_train)
    
    # Evaluate final model
    y_pred = final_ensemble.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f'Final Mean Squared Error: {mse}')
    print(f'Final R2 Score: {r2}')
    
    # Feature importance analysis
    feature_importance = pd.DataFrame({
        'feature': features,
        'importance': final_ensemble.named_estimators_['rf'].feature_importances_
    })
    print("\nFeature Importance:")
    print(feature_importance.sort_values('importance', ascending=False))
    
    # Example prediction
    future_date = pd.Timestamp('2024-03-15')
    staff_needed, predicted_sales = predict_staffing_needs(
        final_ensemble, scaler, future_date, 'Evening', features
    )
    print(f'\nPrediction for {future_date.date()} Evening:')
    print(f'Predicted sales: ${predicted_sales:.2f}')
    print(f'Recommended staff: {staff_needed}')

  X = df[features].fillna(method='ffill').fillna(method='bfill')
[I 2024-12-05 22:32:41,233] A new study created in memory with name: no-name-6c71f0ae-1fd0-4616-b542-7479a4b88ec3
[W 2024-12-05 22:32:41,235] Trial 0 failed with parameters: {'rf1_n_estimators': 2178, 'rf1_max_depth': 39, 'rf1_min_samples_split': 4, 'rf1_min_samples_leaf': 2, 'rf2_n_estimators': 988, 'rf2_max_depth': 13, 'gb1_n_estimators': 802, 'gb1_learning_rate': 0.09190133276686592, 'gb1_max_depth': 5, 'gb1_subsample': 0.6847634161947196, 'gb2_n_estimators': 250, 'gb2_learning_rate': 0.027438898125272275, 'gb2_max_depth': 9, 'et_n_estimators': 998, 'et_max_depth': 30} because of the following error: The value None could not be cast to float..
[W 2024-12-05 22:32:41,235] Trial 0 failed with value None.
[W 2024-12-05 22:32:41,236] Trial 1 failed with parameters: {'rf1_n_estimators': 2575, 'rf1_max_depth': 18, 'rf1_min_samples_split': 8, 'rf1_min_samples_leaf': 8, 'rf2_n_estimators': 1056, 'rf2_max_depth': 14, 'gb1_n_est

[W 2024-12-05 22:32:41,237] Trial 1 failed with value None.
[W 2024-12-05 22:32:41,239] Trial 2 failed with parameters: {'rf1_n_estimators': 1440, 'rf1_max_depth': 36, 'rf1_min_samples_split': 12, 'rf1_min_samples_leaf': 6, 'rf2_n_estimators': 1952, 'rf2_max_depth': 30, 'gb1_n_estimators': 467, 'gb1_learning_rate': 0.029987360551676476, 'gb1_max_depth': 5, 'gb1_subsample': 0.6881510778928049, 'gb2_n_estimators': 489, 'gb2_learning_rate': 0.023056463184255513, 'gb2_max_depth': 9, 'et_n_estimators': 988, 'et_max_depth': 32} because of the following error: The value None could not be cast to float..
[W 2024-12-05 22:32:41,240] Trial 2 failed with value None.
[W 2024-12-05 22:32:41,241] Trial 3 failed with parameters: {'rf1_n_estimators': 1635, 'rf1_max_depth': 23, 'rf1_min_samples_split': 2, 'rf1_min_samples_leaf': 3, 'rf2_n_estimators': 2188, 'rf2_max_depth': 12, 'gb1_n_estimators': 627, 'gb1_learning_rate': 0.001799468701140834, 'gb1_max_depth': 6, 'gb1_subsample': 0.9021346419038347, '

ValueError: No trials are completed yet.

In [73]:
def create_advanced_features(df):
    # Time-based features
    df['hour_of_day'] = pd.Categorical(df['time_of_sale'], 
                                     categories=['Morning', 'Afternoon', 'Evening', 'Night'],
                                     ordered=True).codes
    
    # Enhanced time-based features
    df['day_of_week'] = df['date'].dt.dayofweek
    df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)
    df['month'] = df['date'].dt.month
    df['quarter'] = df['date'].dt.quarter
    df['day_of_year'] = df['date'].dt.dayofyear
    df['week_of_year'] = df['date'].dt.isocalendar().week
    
    if 'quantity' in df.columns:
        # Sales patterns - only calculate if we have quantity data
        daily_stats = df.groupby(['date', 'time_of_sale'])['quantity'].agg([
            'mean', 'count', 'std', 'min', 'max'
        ]).reset_index()
        df = df.merge(daily_stats, on=['date', 'time_of_sale'], suffixes=('', '_daily'))
        
        # Rolling statistics
        df = df.sort_values('date')
        for window in [7, 14, 30]:
            df[f'rolling_{window}d_mean'] = df.groupby('time_of_sale')['quantity'].transform(
                lambda x: x.rolling(window, min_periods=1).mean())
            df[f'rolling_{window}d_std'] = df.groupby('time_of_sale')['quantity'].transform(
                lambda x: x.rolling(window, min_periods=1).std())
    else:
        # For prediction data, add placeholder columns
        df['mean'] = 0
        df['count'] = 0
        df['std'] = 0
        df['min'] = 0
        df['max'] = 0
        for window in [7, 14, 30]:
            df[f'rolling_{window}d_mean'] = 0
            df[f'rolling_{window}d_std'] = 0
    
    return df

def create_ensemble_model(params):
    rf = RandomForestRegressor(
        n_estimators=params['rf_n_estimators'],
        max_depth=params['rf_max_depth'],
        min_samples_split=params['rf_min_samples_split'],
        min_samples_leaf=params['rf_min_samples_leaf'],
        random_state=42,
        n_jobs=-1
    )
    
    gb = GradientBoostingRegressor(
        n_estimators=params['gb_n_estimators'],
        learning_rate=params['gb_learning_rate'],
        max_depth=params['gb_max_depth'],
        subsample=params['gb_subsample'],
        random_state=42
    )
    
    return VotingRegressor([
        ('rf', rf),
        ('gb', gb)
    ])

def predict_item_sales(model, scaler, date, time_of_day, features_list):
    """
    Predict number of items that will be sold
    """
    # Create feature vector for prediction
    prediction_df = pd.DataFrame({
        'date': [date],
        'time_of_sale': [time_of_day]
    })
    
    # Create features for prediction
    prediction_df = create_advanced_features(prediction_df)
    X_pred = prediction_df[features_list].fillna(0)
    
    # Scale features
    X_pred_scaled = scaler.transform(X_pred)
    
    # Predict number of items
    predicted_items = model.predict(X_pred_scaled)[0]
    
    return max(1, round(predicted_items))  # Ensure at least 1 item is predicted

  X = df[features].fillna(method='ffill').fillna(method='bfill')


KeyError: "['time_numeric'] not in index"

In [75]:
def objective(trial):
    params = {
        'rf_n_estimators': trial.suggest_int('rf_n_estimators', 1000, 3000),
        'rf_max_depth': trial.suggest_int('rf_max_depth', 15, 40),
        'rf_min_samples_split': trial.suggest_int('rf_min_samples_split', 2, 15),
        'rf_min_samples_leaf': trial.suggest_int('rf_min_samples_leaf', 1, 8),
        
        'gb_n_estimators': trial.suggest_int('gb_n_estimators', 200, 1000),
        'gb_learning_rate': trial.suggest_float('gb_learning_rate', 0.001, 0.1, log=True),
        'gb_max_depth': trial.suggest_int('gb_max_depth', 3, 12),
        'gb_subsample': trial.suggest_float('gb_subsample', 0.6, 1.0)
    }
    
    ensemble = create_ensemble_model(params)
    ensemble.fit(X_train, y_train)
    
    # Calculate R2 score
    y_pred = ensemble.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    
    print(f'Trial {trial.number}: R2 Score = {r2:.4f}')
    return r2

# Main execution
if __name__ == "__main__":
    # ... previous code remains the same until model training ...
    
    print("Starting model optimization...")
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=20)  # Reduced to 20 trials for demonstration
    
    print("\nBest trial:")
    print(f"R2 Score: {study.best_value:.4f}")
    print("Best hyperparameters:", study.best_params)
    
    # Train final model with best parameters
    best_params = study.best_params
    final_ensemble = create_ensemble_model(best_params)
    final_ensemble.fit(X_train, y_train)
    
    # Final evaluation
    y_pred = final_ensemble.predict(X_test)
    final_r2 = r2_score(y_test, y_pred)
    final_mse = mean_squared_error(y_test, y_pred)
    
    print(f'\nFinal Model Performance:')
    print(f'R2 Score: {final_r2:.4f}')
    print(f'Mean Squared Error: {final_mse:.4f}')
    
    # Feature importance analysis
    feature_importance = pd.DataFrame({
        'feature': features,
        'importance': final_ensemble.named_estimators_['rf'].feature_importances_
    })
    print("\nFeature Importance:")
    print(feature_importance.sort_values('importance', ascending=False))
    
    # Example prediction
    future_date = pd.Timestamp('2024-03-15')
    predicted_items = predict_item_sales(
        final_ensemble, scaler, future_date, 'Evening', features
    )
    print(f'\nPrediction for {future_date.date()} Evening:')
    print(f'Predicted number of items to be sold: {predicted_items}')

[I 2024-12-05 22:23:56,124] A new study created in memory with name: no-name-781ccfb6-7713-492a-82cb-520d27b9f985


Starting model optimization...


[I 2024-12-05 22:23:57,662] Trial 0 finished with value: 0.7283759283377945 and parameters: {'rf_n_estimators': 1427, 'rf_max_depth': 32, 'rf_min_samples_split': 4, 'rf_min_samples_leaf': 7, 'gb_n_estimators': 219, 'gb_learning_rate': 0.019554615128023346, 'gb_max_depth': 8, 'gb_subsample': 0.8524355342043999}. Best is trial 0 with value: 0.7283759283377945.


Trial 0: R2 Score = 0.7284


[I 2024-12-05 22:23:59,745] Trial 1 finished with value: 0.7656052132943449 and parameters: {'rf_n_estimators': 2780, 'rf_max_depth': 25, 'rf_min_samples_split': 10, 'rf_min_samples_leaf': 5, 'gb_n_estimators': 425, 'gb_learning_rate': 0.0033106683337264836, 'gb_max_depth': 3, 'gb_subsample': 0.685810316485163}. Best is trial 1 with value: 0.7656052132943449.


Trial 1: R2 Score = 0.7656


[I 2024-12-05 22:24:02,053] Trial 2 finished with value: 0.7357050406740353 and parameters: {'rf_n_estimators': 2007, 'rf_max_depth': 26, 'rf_min_samples_split': 3, 'rf_min_samples_leaf': 2, 'gb_n_estimators': 721, 'gb_learning_rate': 0.0038571576697481783, 'gb_max_depth': 5, 'gb_subsample': 0.6084061917827897}. Best is trial 1 with value: 0.7656052132943449.


Trial 2: R2 Score = 0.7357


[I 2024-12-05 22:24:04,260] Trial 3 finished with value: 0.6868037744388561 and parameters: {'rf_n_estimators': 1644, 'rf_max_depth': 37, 'rf_min_samples_split': 9, 'rf_min_samples_leaf': 1, 'gb_n_estimators': 362, 'gb_learning_rate': 0.0018220010585002367, 'gb_max_depth': 9, 'gb_subsample': 0.8692762268538454}. Best is trial 1 with value: 0.7656052132943449.


Trial 3: R2 Score = 0.6868


[I 2024-12-05 22:24:06,042] Trial 4 finished with value: 0.7050546226783332 and parameters: {'rf_n_estimators': 1797, 'rf_max_depth': 28, 'rf_min_samples_split': 13, 'rf_min_samples_leaf': 4, 'gb_n_estimators': 240, 'gb_learning_rate': 0.0028938056744568417, 'gb_max_depth': 10, 'gb_subsample': 0.756652992326917}. Best is trial 1 with value: 0.7656052132943449.


Trial 4: R2 Score = 0.7051


[I 2024-12-05 22:24:09,985] Trial 5 finished with value: 0.701603585879562 and parameters: {'rf_n_estimators': 1752, 'rf_max_depth': 24, 'rf_min_samples_split': 13, 'rf_min_samples_leaf': 2, 'gb_n_estimators': 963, 'gb_learning_rate': 0.02783074452099757, 'gb_max_depth': 11, 'gb_subsample': 0.6230056902004372}. Best is trial 1 with value: 0.7656052132943449.


Trial 5: R2 Score = 0.7016


[I 2024-12-05 22:24:12,392] Trial 6 finished with value: 0.7278147614358035 and parameters: {'rf_n_estimators': 2782, 'rf_max_depth': 40, 'rf_min_samples_split': 4, 'rf_min_samples_leaf': 3, 'gb_n_estimators': 448, 'gb_learning_rate': 0.017859457609895418, 'gb_max_depth': 4, 'gb_subsample': 0.8666354229365332}. Best is trial 1 with value: 0.7656052132943449.


Trial 6: R2 Score = 0.7278


[I 2024-12-05 22:24:14,920] Trial 7 finished with value: 0.7091035188240562 and parameters: {'rf_n_estimators': 2110, 'rf_max_depth': 34, 'rf_min_samples_split': 8, 'rf_min_samples_leaf': 6, 'gb_n_estimators': 412, 'gb_learning_rate': 0.0016285343655951703, 'gb_max_depth': 9, 'gb_subsample': 0.814749077243582}. Best is trial 1 with value: 0.7656052132943449.


Trial 7: R2 Score = 0.7091


[I 2024-12-05 22:24:19,957] Trial 8 finished with value: 0.6945710596668693 and parameters: {'rf_n_estimators': 2807, 'rf_max_depth': 37, 'rf_min_samples_split': 7, 'rf_min_samples_leaf': 3, 'gb_n_estimators': 884, 'gb_learning_rate': 0.003354863966013552, 'gb_max_depth': 10, 'gb_subsample': 0.9507296966782848}. Best is trial 1 with value: 0.7656052132943449.


Trial 8: R2 Score = 0.6946


[I 2024-12-05 22:24:23,968] Trial 9 finished with value: 0.73401353678581 and parameters: {'rf_n_estimators': 2912, 'rf_max_depth': 28, 'rf_min_samples_split': 12, 'rf_min_samples_leaf': 4, 'gb_n_estimators': 797, 'gb_learning_rate': 0.0023606285212427617, 'gb_max_depth': 7, 'gb_subsample': 0.9295331024170812}. Best is trial 1 with value: 0.7656052132943449.


Trial 9: R2 Score = 0.7340


[I 2024-12-05 22:24:25,979] Trial 10 finished with value: 0.7221376694648756 and parameters: {'rf_n_estimators': 2393, 'rf_max_depth': 16, 'rf_min_samples_split': 15, 'rf_min_samples_leaf': 8, 'gb_n_estimators': 580, 'gb_learning_rate': 0.07370276458449991, 'gb_max_depth': 3, 'gb_subsample': 0.721161591668444}. Best is trial 1 with value: 0.7656052132943449.


Trial 10: R2 Score = 0.7221


[I 2024-12-05 22:24:27,639] Trial 11 finished with value: 0.7434186409847775 and parameters: {'rf_n_estimators': 1071, 'rf_max_depth': 21, 'rf_min_samples_split': 2, 'rf_min_samples_leaf': 6, 'gb_n_estimators': 693, 'gb_learning_rate': 0.006712879399992157, 'gb_max_depth': 5, 'gb_subsample': 0.611440446019291}. Best is trial 1 with value: 0.7656052132943449.


Trial 11: R2 Score = 0.7434


[I 2024-12-05 22:24:29,343] Trial 12 finished with value: 0.7321943255066413 and parameters: {'rf_n_estimators': 1046, 'rf_max_depth': 20, 'rf_min_samples_split': 10, 'rf_min_samples_leaf': 6, 'gb_n_estimators': 583, 'gb_learning_rate': 0.007647172841500927, 'gb_max_depth': 6, 'gb_subsample': 0.6788147372339262}. Best is trial 1 with value: 0.7656052132943449.


Trial 12: R2 Score = 0.7322


[I 2024-12-05 22:24:31,429] Trial 13 finished with value: 0.7629884617046256 and parameters: {'rf_n_estimators': 2423, 'rf_max_depth': 21, 'rf_min_samples_split': 6, 'rf_min_samples_leaf': 6, 'gb_n_estimators': 681, 'gb_learning_rate': 0.0066020476848608, 'gb_max_depth': 3, 'gb_subsample': 0.6728086703395575}. Best is trial 1 with value: 0.7656052132943449.


Trial 13: R2 Score = 0.7630


[I 2024-12-05 22:24:33,328] Trial 14 finished with value: 0.7027789010572504 and parameters: {'rf_n_estimators': 2371, 'rf_max_depth': 15, 'rf_min_samples_split': 6, 'rf_min_samples_leaf': 5, 'gb_n_estimators': 485, 'gb_learning_rate': 0.0010082987533887406, 'gb_max_depth': 3, 'gb_subsample': 0.7086869609567851}. Best is trial 1 with value: 0.7656052132943449.


Trial 14: R2 Score = 0.7028


[I 2024-12-05 22:24:35,398] Trial 15 finished with value: 0.7720814354104102 and parameters: {'rf_n_estimators': 2559, 'rf_max_depth': 21, 'rf_min_samples_split': 6, 'rf_min_samples_leaf': 8, 'gb_n_estimators': 655, 'gb_learning_rate': 0.005852329824658076, 'gb_max_depth': 3, 'gb_subsample': 0.6682096471389335}. Best is trial 15 with value: 0.7720814354104102.


Trial 15: R2 Score = 0.7721


[I 2024-12-05 22:24:37,421] Trial 16 finished with value: 0.7460410185640091 and parameters: {'rf_n_estimators': 2655, 'rf_max_depth': 24, 'rf_min_samples_split': 11, 'rf_min_samples_leaf': 8, 'gb_n_estimators': 316, 'gb_learning_rate': 0.014056462364492182, 'gb_max_depth': 5, 'gb_subsample': 0.746912608046722}. Best is trial 15 with value: 0.7720814354104102.


Trial 16: R2 Score = 0.7460


[I 2024-12-05 22:24:39,534] Trial 17 finished with value: 0.7679957433412572 and parameters: {'rf_n_estimators': 2597, 'rf_max_depth': 18, 'rf_min_samples_split': 9, 'rf_min_samples_leaf': 7, 'gb_n_estimators': 521, 'gb_learning_rate': 0.0046802946929352975, 'gb_max_depth': 4, 'gb_subsample': 0.6605002349563839}. Best is trial 15 with value: 0.7720814354104102.


Trial 17: R2 Score = 0.7680


[I 2024-12-05 22:24:41,938] Trial 18 finished with value: 0.7200719871352461 and parameters: {'rf_n_estimators': 2546, 'rf_max_depth': 18, 'rf_min_samples_split': 6, 'rf_min_samples_leaf': 7, 'gb_n_estimators': 527, 'gb_learning_rate': 0.041815451877901096, 'gb_max_depth': 6, 'gb_subsample': 0.6485436955185222}. Best is trial 15 with value: 0.7720814354104102.


Trial 18: R2 Score = 0.7201


[I 2024-12-05 22:24:44,318] Trial 19 finished with value: 0.7413219000282447 and parameters: {'rf_n_estimators': 2194, 'rf_max_depth': 18, 'rf_min_samples_split': 8, 'rf_min_samples_leaf': 8, 'gb_n_estimators': 794, 'gb_learning_rate': 0.010312264100579985, 'gb_max_depth': 4, 'gb_subsample': 0.777520852520408}. Best is trial 15 with value: 0.7720814354104102.


Trial 19: R2 Score = 0.7413

Best trial:
R2 Score: 0.7721
Best hyperparameters: {'rf_n_estimators': 2559, 'rf_max_depth': 21, 'rf_min_samples_split': 6, 'rf_min_samples_leaf': 8, 'gb_n_estimators': 655, 'gb_learning_rate': 0.005852329824658076, 'gb_max_depth': 3, 'gb_subsample': 0.6682096471389335}

Final Model Performance:
R2 Score: 0.7721
Mean Squared Error: 4.4352

Feature Importance:
             feature  importance
7               mean    0.885529
11               max    0.021302
10               min    0.019395
12   rolling_7d_mean    0.015082
9                std    0.014548
13    rolling_7d_std    0.011664
15   rolling_14d_std    0.008584
17   rolling_30d_std    0.006808
14  rolling_14d_mean    0.006418
16  rolling_30d_mean    0.005144
2        day_of_year    0.001459
0        day_of_week    0.001084
3        hour_of_day    0.000980
8              count    0.000804
6       week_of_year    0.000748
1              month    0.000261
4         is_weekend    0.000097
5            qu

