In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, Ridge, Lasso, BayesianRidge
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score, max_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
import warnings

warnings.filterwarnings("ignore")


csv_filenames = [
    "Center_13_Rank1.csv", "Center_43_Rank2.csv", "Center_10_Rank3.csv", "Center_52_Rank4.csv", "Center_137_Rank5.csv",
    "Center_67_Rank6.csv", "Center_11_Rank7.csv", "Center_174_Rank8.csv", "Center_104_Rank9.csv", "Center_27_Rank10.csv"
]

for i in csv_filenames:
    data = pd.read_csv("{}".format(i))
    train_data = data[data['week'] <= 130]
    test_data = data[data['week'] > 130]

    X_train = train_data[["checkout_price","cuisine","meal_id","emailer_for_promotion","homepage_featured", "category", "week"]]
    y_train = train_data["num_orders"]
    X_test = test_data[["checkout_price","cuisine","meal_id","emailer_for_promotion","homepage_featured", "category", "week"]]
    y_test = test_data["num_orders"]

    categorical_cols = ["cuisine","meal_id", "category"]


    categorical_transformer = Pipeline(steps=[
        ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
    ])


    preprocessor = ColumnTransformer(
        transformers=[
            ('cat', categorical_transformer, categorical_cols)
        ], remainder='passthrough')

    regressors = [
      ("Linear Regression", LinearRegression()),
      ("Ridge Regression", Ridge(alpha=1.0)),
      ("Lasso Regression", Lasso(alpha=1.0)),
      ("KNN Regressor", KNeighborsRegressor(n_neighbors=5)),
      ("Bayesian Ridge", BayesianRidge()),
      ("Random forest regressor", RandomForestRegressor(n_estimators=100, random_state=42)),
      ("XGBRegressor", XGBRegressor()),
      ("SVM Regressor", SVR(kernel='linear')),
      ("Gradient Boosting Regressor", GradientBoostingRegressor(random_state=42))
  ]

    print(f"\n\n{i}\n\n")
    for name, model in regressors:
        pipeline = Pipeline(steps=[('preprocessor', preprocessor), (name, model)])
        pipeline.fit(X_train, y_train)

        y_pred = pipeline.predict(X_test)

        mse = mean_squared_error(y_test, y_pred)
        rmse=np.sqrt(mse)
        mae = mean_absolute_error(y_test, y_pred)
        r_squared = r2_score(y_test, y_pred)


        print(f"{name}: RMSE = {round(rmse, 4)}, MAE = {round(mae, 4)}, R-squared = {round(r_squared, 4)}")



Center_13_Rank1.csv


Linear Regression: RMSE = 466.3427, MAE = 194.7813, R-squared = 0.6556
Ridge Regression: RMSE = 466.2894, MAE = 194.6423, R-squared = 0.6557
Lasso Regression: RMSE = 465.7142, MAE = 197.2413, R-squared = 0.6565
KNN Regressor: RMSE = 739.8958, MAE = 402.7359, R-squared = 0.1331
Bayesian Ridge: RMSE = 466.2917, MAE = 194.654, R-squared = 0.6557
Random forest regressor: RMSE = 358.9881, MAE = 148.6287, R-squared = 0.7959
XGBRegressor: RMSE = 354.9545, MAE = 141.2704, R-squared = 0.8005
SVM Regressor: RMSE = 655.1707, MAE = 284.8973, R-squared = 0.3203
Gradient Boosting Regressor: RMSE = 394.9661, MAE = 175.9509, R-squared = 0.753


Center_43_Rank2.csv


Linear Regression: RMSE = 555.5776, MAE = 235.7589, R-squared = 0.5209
Ridge Regression: RMSE = 555.5585, MAE = 235.6145, R-squared = 0.5209
Lasso Regression: RMSE = 555.4744, MAE = 237.6939, R-squared = 0.5211
KNN Regressor: RMSE = 791.5146, MAE = 368.5638, R-squared = 0.0276
Bayesian Ridge: RMSE = 555.5154, MAE = 