# Load Data

In [6]:
import os
import sys
import pandas as pd

folder_path = os.path.join(os.path.dirname(os.getcwd()), 'Data_Test_Multi_Raw')
print(folder_path)
file_names = ['data_test.csv', 'data_train.csv', 'target_test.csv', 'target_train.csv']

data_frames = []
for file_name in file_names:
    file_path = os.path.join(folder_path, file_name)
    df = pd.read_csv(file_path)
    data_frames.append(df)

data_test = data_frames[0]
data_train = data_frames[1]
target_test = data_frames[2]    
target_train = data_frames[3]

target_test = target_test['product_1_demand']
target_train = target_train['product_1_demand']

c:\Users\lanza\Integrated-vs-Seperated-Master-Thesis\Data_Test_Multi_Raw
0    54
1    59
2    49
3    46
4    48
Name: product_1_demand, dtype: int64


## Preprocess

In [7]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Define preprocessing for numeric columns (scale them)
numeric_features = ['temperature']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

# Define preprocessing for categorical features (encode them)
categorical_features = ['location']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

# Combine preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)],
    remainder='passthrough')

# Preprocessing on train data
X_train = preprocessor.fit_transform(data_train)

# Preprocessing on test data
X_test = preprocessor.transform(data_test)


print(X_test)

[[-1.2614810567465224 0.0 0.0 ... 5 2022 False]
 [0.03038460101498815 0.0 0.0 ... 9 2020 False]
 [-0.2866376462884491 0.0 0.0 ... 5 2020 False]
 ...
 [-1.2614810567465224 0.0 0.0 ... 1 2021 False]
 [-1.0316399274515304 0.0 0.0 ... 1 2022 False]
 [0.03038460101498815 0.0 0.0 ... 4 2020 True]]


In [9]:
# Initialize an empty list to store the final order quantities
final_order_quantities_ANN = []
final_order_quantities_DT = []

# Define costs
c = 2  # cost per unit of product
s = 1  # salvage value per unit of leftover product
p = 3  # price per unit of product sold

# Calculate critical ratio
critical_ratio = (p - c) / ((c - s) + (p - c))

# ANN

In [14]:
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import RandomizedSearchCV
from keras.optimizers import Adam
import tensorflow as tf
from sklearn.model_selection import train_test_split
import numpy as np

# Define a custom loss function for the newsvendor problem with quantile regression
def newsvendor_quantile_loss(q, y_true, y_pred):
    error = y_true - y_pred
    return tf.keras.backend.mean(tf.maximum(q*error, (q-1)*error), axis=-1)

# Define a function to create a model
def create_model(n_hidden, n_neurons, learning_rate, activation, input_dim=15): #learning_rate_input=3e-3, 
    model = Sequential()
    model.add(Dense(n_neurons, activation=activation, input_dim=input_dim))
    for layer in range(n_hidden):
        model.add(Dense(n_neurons, activation=activation))
    model.add(Dense(1))
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(loss=lambda y,f: newsvendor_quantile_loss(critical_ratio,y,f), optimizer=optimizer)
    return model

# Define a function to build the model
def model_builder(n_hidden=1, n_neurons=30, learning_rate=3e-3 , activation = 'relu'): #
    return KerasRegressor(build_fn=create_model, verbose=0, n_hidden=n_hidden, n_neurons=n_neurons, learning_rate=learning_rate, activation=activation)


# Create a model
model_ANN = model_builder()

# Define the parameter grid
from scipy.stats import reciprocal
param_distribs = {
    "n_hidden": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    "n_neurons": np.arange(1, 100),
    "learning_rate": reciprocal(1e-4, 1e-2),
    "batch_size": [16, 32, 64, 128],
    "epochs": [10, 15, 20, 25, 30],
    "activation": ['relu', 'sigmoid', 'tanh']
}

# Optimize the model using RandomizedSearchCV
rnd_search_cv_ANN = RandomizedSearchCV(model_ANN, param_distribs, n_iter=10, cv=3, scoring='neg_mean_squared_error')

# Fit the model
target_train = target_train.astype('float32')
rnd_search_cv_ANN.fit(X_train, target_train)
print(rnd_search_cv_ANN.best_params_)

# Make predictions
target_pred_ANN = rnd_search_cv_ANN.predict(X_test)

# Calculate the MSE
mse = mean_squared_error(target_test, target_pred_ANN)
print("MSE:", mse)


  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)


{'activation': 'relu', 'batch_size': 32, 'epochs': 15, 'learning_rate': 0.0007024398871329958, 'n_hidden': 1, 'n_neurons': 34}
MSE: 27.871427522041632


# DT 

In [15]:
import lightgbm as lgb
from sklearn.model_selection import RandomizedSearchCV

# Specify the parameter grid for RandomizedSearchCV
param_distribs = {
    'boosting_type': ['gbdt', 'dart', 'goss'],
    'num_leaves': [10, 20, 30, 40, 50],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'feature_fraction': [0.8, 0.9, 1.0],
    'bagging_fraction': [0.8, 0.9, 1.0],
    'bagging_freq': [3, 4, 5, 6, 7],
}

# Create the LightGBM model
model = lgb.LGBMRegressor(objective='quantile', alpha=critical_ratio, metric='quantile', verbose=0)

# Perform RandomizedSearchCV
rnd_search = RandomizedSearchCV(model, param_distributions=param_distribs, n_iter=10, cv=3)

# Fit the model
rnd_search.fit(X_train, target_train)
print(rnd_search.best_params_)

# Get the best model
best_model = rnd_search.best_estimator_

# Predict the demand using the best model
target_pred_DT = best_model.predict(X_test)
print(target_pred_DT)

{'num_leaves': 40, 'learning_rate': 0.2, 'feature_fraction': 0.8, 'boosting_type': 'gbdt', 'bagging_freq': 6, 'bagging_fraction': 0.8}
[53.99680935 59.01488984 48.77656787 46.33322722 48.00206618 45.05426093
 44.91774945 44.0214055  44.56037447 44.00398058 58.96372899 43.99408783
 53.99458223 47.98080582 59.01453041 45.45597267 53.99505288 45.00415001
 49.00329989 55.0002717  48.92307364 49.01024812 59.00078553 53.98809667
 43.92805622 58.96654974 44.73149616 47.99773451 54.9991004  58.99436987
 46.04486599 46.04226898 53.99960887 58.99595631 48.00236851 44.04182071
 46.02449642 48.90707086 49.13405427 45.11892459 53.99035415 54.02828127
 48.00664742 49.36638895 47.99736724 55.00062629 49.00031761 54.00314329
 54.02123362 45.95504229 58.9530419  48.94779995 43.85784758 43.91145111
 43.99360881 54.9978541  55.00193595 45.95939171 48.85287958 54.99487593
 55.00135167 45.60278309 44.9953947  48.46891395 47.99763365 48.44176145
 48.95603206 44.01573062 59.00079847 49.02652363 59.01592703 5

9 fits failed out of a total of 30.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
9 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\lanza\Integrated-vs-Seperated-Master-Thesis\.venv\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\lanza\Integrated-vs-Seperated-Master-Thesis\.venv\Lib\site-packages\lightgbm\sklearn.py", line 1092, in fit
    super().fit(
  File "c:\Users\lanza\Integrated-vs-Seperated-Master-Thesis\.venv\Lib\site-packages\lightgbm\sklearn.py", line 885, in fit
    self._Booster = train(
                    ^^^^^^
  File "c:\Users\lanza\Integrated-vs-Seperated-Master-Thesis\.ve

# Costs

In [16]:
# Loop over each week in target_test
overall_costs_ANN = 0
overall_costs_DT = 0

for i in range(len(target_test)):

    # Calculate understock and overstock costs
    cost_ANN = 0
    cost_DT = 0
    if target_pred_ANN[i] < target_test.values[i]:
        cost_ANN = (p - c) * (target_test.values[i] - np.round(target_pred_ANN[i]))
    if target_pred_ANN[i] > target_test.values[i]:
        cost_ANN = (c - s) * (np.round(target_pred_ANN[i]) - target_test.values[i])
    if target_pred_DT[i] < target_test.values[i]:
        cost_DT = (p - c) * (target_test.values[i] - np.round(target_pred_DT[i]))
    if target_pred_DT[i] > target_test.values[i]:
        cost_DT = (c - s) * (np.round(target_pred_DT[i]) - target_test.values[i])

    # Calculate the total costs for the week
    overall_costs_ANN += cost_ANN
    overall_costs_DT += cost_DT
   

# Print the overall profit
print('Overall costs for ANN: ', int(overall_costs_ANN))
print('Overall costs for DT: ', int(overall_costs_DT))

Overall costs for ANN:  721
Overall costs for DT:  125
