In [122]:
# Import necessary libraries
import pandas as pd
import numpy as np

import xgboost as xgb
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import train_test_split

import lightgbm as ltb

import seaborn as sns

from sklearn.metrics import mean_squared_error

In [123]:
# Read the train dataset from the csv file
train_df = pd.read_csv("../Datasets/new_df.csv")

In [125]:
# Extract the week number from the 'year_week' column as an integer
train_df['nweek'] = train_df['year_week'].map(lambda x: str(x)[-2:]).astype(np.int64)

# Convert the 'date' column to datetime format
train_df['date'] = pd.to_datetime(train_df['date'])

# Extract year and month from the 'date' column and creating new columns for them
train_df['year'], train_df['month'] = train_df['date'].dt.year, train_df['date'].dt.month

# Dropping the 'date' and 'id' columns from the dataframe
train_df = train_df.drop(['date'],axis=1)
train_df = train_df.drop(['id'],axis=1)

In [126]:
# Selecting rows from train_df where the year column is either 2021, 2020 or 2019
train_2021 = train_df[(train_df['year'] == 2021) | (train_df['year'] == 2020) | (train_df['year'] == 2019)]

# Selecting rows from train_df where the year column is 2022
train_2022 = train_df[train_df['year'] == 2022]

In [127]:
# Creating features and target variables for the year 2021 data
X = train_2021.drop(columns=['inventory_units'])
Y = train_2021['inventory_units']

# Creating features and target variables for the year 2022 data
X_2022 = train_2022.drop(columns=['inventory_units'])
Y_2022 = train_2022['inventory_units']

In [128]:
# Drop three columns from the dataframe X
X = X.drop(columns = ['Unnamed: 0', 'year_week', 'sales_units'])

# Drop three columns from the dataframe X_2022
X_2022 = X_2022.drop(columns = ['Unnamed: 0', 'year_week', 'sales_units'])

In [129]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, shuffle = False)

In [133]:
# Create a LightGBM regressor model with specified hyperparameters
model = ltb.LGBMRegressor(boosting_type = 'dart',
                          num_leaves = 30,
                          max_depth = 200,
                          learning_rate = 0.15,
                          n_estimators = 50,
                          importance_type  = 'gain',
                          n_jobs = -1)

# Fit the model on the training data
model.fit(X_train, y_train)

LGBMRegressor(boosting_type='dart', importance_type='gain', learning_rate=0.15,
              max_depth=200, n_estimators=50, num_leaves=30)

In [134]:
# Predict inventory units for training set and calculate root mean squared error (RMS) for training set
y_predicted_train = model.predict(X_train)
rms_train = mean_squared_error(y_train, y_predicted_train, squared=False)
print(f"RMS train: {rms_train}")

# Predict inventory units for test set and calculate root mean squared error (RMS) for test set
y_predicted_test = model.predict(X_test)
rms_test = mean_squared_error(y_test, y_predicted_test, squared=False)
print(f"RMS test: {rms_test}")

RMS train: 42.459893747029426
RMS test: 162.03091977906104


In [135]:
# Predict inventory units for 2022 using the trained model and calculate RMSE
y_predicted_train = model.predict(X_2022)
rms_train = mean_squared_error(Y_2022, y_predicted_train, squared=False)
print(f"RMS 2022: {rms_train}")

RMS 2022: 137.37992871120846
