In [1]:
%cd ../

/home/hoanghu/projects/Food-Waste-Optimization


In [2]:
from pathlib import Path

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from xgboost import XGBRegressor
from skl2onnx import to_onnx
import onnxruntime as rt

In [3]:
path_root_trained_model = Path("trained_models")
path_fact = "rq co2 prediction/grouped_co2_df.csv"

# Read data

In [4]:
raw = pd.read_csv(path_fact)

raw.head()

Unnamed: 0,Date,Restaurant,num_fish,num_chicken,num_vegetarian,num_meat,pcs_Not Mapped,num_vegan,carbon_emissions_sum
0,2023-01-02,600 Chemicum,85.0,0.0,0.0,171.0,0.0,91.0,273.82
1,2023-01-03,600 Chemicum,163.0,0.0,32.0,78.0,0.0,120.0,275.97
2,2023-01-04,600 Chemicum,70.0,0.0,0.0,218.0,0.0,137.0,246.55
3,2023-01-05,600 Chemicum,232.0,85.0,0.0,2.0,0.0,178.0,298.02
4,2023-01-09,600 Chemicum,107.0,0.0,0.0,283.0,0.0,216.0,402.71


# Data processing

In [5]:
data = raw.copy()

data['Restaurant'] = data['Restaurant'].replace({
    '600 Chemicum': 'Chemicum',
    '610 Physicum': 'Physicum',
    '620 Exactum': 'Exactum'
})

data.head()

Unnamed: 0,Date,Restaurant,num_fish,num_chicken,num_vegetarian,num_meat,pcs_Not Mapped,num_vegan,carbon_emissions_sum
0,2023-01-02,Chemicum,85.0,0.0,0.0,171.0,0.0,91.0,273.82
1,2023-01-03,Chemicum,163.0,0.0,32.0,78.0,0.0,120.0,275.97
2,2023-01-04,Chemicum,70.0,0.0,0.0,218.0,0.0,137.0,246.55
3,2023-01-05,Chemicum,232.0,85.0,0.0,2.0,0.0,178.0,298.02
4,2023-01-09,Chemicum,107.0,0.0,0.0,283.0,0.0,216.0,402.71


# Create regression model for each restaurant

In [6]:
RESTAURANTS = data['Restaurant'].unique()
cols_X = [
    'num_fish',
    'num_chicken',
    'num_vegetarian',
    'num_meat',
    'num_vegan'
]
col_y = 'carbon_emissions_sum'

In [7]:
for restaurant in RESTAURANTS:
    # Get data for training
    df = data[data['Restaurant'] == restaurant]
    X, y = df[cols_X], df[col_y]

    # Define and train model
    regressor = XGBRegressor()
    regressor.fit(X, y)

    # Save model
    path_model = path_root_trained_model / "co2" / f"Aug21_XGBoost_{restaurant}.json"
    path_model.parent.mkdir(exist_ok=True, parents=True)

    regressor.save_model(path_model)

# Load model

In [9]:
X = np.array([[
    123, 123, 123, 123, 123
]])

regressor = XGBRegressor()
regressor.load_model(path_model)

regressor.predict(X)

array([299.29333], dtype=float32)

## Choose best model

In [None]:
models = {
    'LinearRegression': 0.0,
    'Ridge': 0.0,
    'Lasso': 0.0,
    'RandomForestRegressor': 0.0,
    'GradientBoostingRegressor': 0.0,
    'XGBRegressor': 0.0,
    'LGBMRegressor': 0.0,
    'CatBoostRegressor': 0.0,
}


# for restaurant in RESTAURANTS:
#     break

restaurant = "Exactum"

restaurant = data[data['Restaurant'] == restaurant]

X, y = restaurant[cols_X], restaurant[col_y]

for model in models.keys():
    regressor = eval(model)()
    scores = cross_val_score(regressor, X, y, cv=5, scoring='neg_root_mean_squared_error')

    models[model]= scores.mean().item()


models

## Prediction visualization

In [None]:
fig = make_subplots(
    rows=1, cols=1,
    specs=[
        [{'type': 'scatter'}]
    ]
)

fig.add_trace(
    go.Scatter(y=regressor.predict(X), mode='lines', name='Prediction'),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(y=y, mode='lines', name='Groundtruth'),
    row=1, col=1
)

fig.update_layout(
    height=800, 
    width=800,
    title_text=f"<b>{restaurant}</b>",
    title_font_size=30,
    xaxis_tickangle=-90,
    title_x=0.5,
)
fig.show()