# Load Data

In [12]:
import os
import sys
import pandas as pd

folder_path = os.path.join(os.path.dirname(os.getcwd()), 'Data_Test_Multi_Raw')
print(folder_path)
file_names = ['data_test.csv', 'data_train.csv', 'target_test.csv', 'target_train.csv']

data_frames = []
for file_name in file_names:
    file_path = os.path.join(folder_path, file_name)
    df = pd.read_csv(file_path)
    data_frames.append(df)

data_test = data_frames[0]
data_train = data_frames[1]
target_test = data_frames[2]    
target_train = data_frames[3]

print(data_test.head())

c:\Users\lanza\Integrated-vs-Seperated-Master-Thesis\Data_Test_Multi_Raw
   day  month  year  is_holiday    location  temperature
0   10      5  2022       False       Rehau         7.74
1   20      9  2020       False  Holzminden         9.37
2   12      5  2020       False    Grafenau         8.97
3    1      4  2020       False     Parchim        10.05
4   23      8  2022       False     Ansbach         7.32


In [13]:
import numpy as np

# Initialize an empty list to store the final order quantities
final_order_quantities_ANN = []
final_order_quantities_DT = []

# Parameters for multi-item newsvendor problem
prices = np.array([0.3, 0.5, 0.6, 0.5, 0.5, 0.5]) #price data
costs = np.array([0.06, 0.06, 0.06, 0.06, 0.06, 0.06]) #cost data
salvages = np.array([0.01, 0.01, 0.01, 0.01, 0.01, 0.01]) #salvage data
underage_data = prices - costs 
overage_data = costs - salvages 


alpha_data = np.array([             #alpha data
    [0.0, 0.1, 0.05, 0.1, 0.05, 0.1],
    [0.15, 0.0, 0.1, 0.05, 0.05, 0.05],
    [0.1, 0.2, 0.0, 0.05, 0.1, 0.05],
    [0.05, 0.05, 0.05, 0.0, 0.15, 0.2],
    [0.1, 0.05, 0.15, 0.2, 0.0, 0.05],
    [0.05, 0.1, 0.05, 0.15, 0.1, 0.0]
])


# Preprocessing

In [14]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Define preprocessing for numeric columns (scale them)
numeric_features = ['temperature']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

# Define preprocessing for categorical features (encode them)
categorical_features = ['location']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

# Combine preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)],
    remainder='passthrough')

# Preprocessing on train data
X_train = preprocessor.fit_transform(data_train)

# Preprocessing on test data
X_test = preprocessor.transform(data_test)


print(X_test)


[[-1.2614810567465224 0.0 0.0 ... 5 2022 False]
 [0.03038460101498815 0.0 0.0 ... 9 2020 False]
 [-0.2866376462884491 0.0 0.0 ... 5 2020 False]
 ...
 [-1.2614810567465224 0.0 0.0 ... 1 2021 False]
 [-1.0316399274515304 0.0 0.0 ... 1 2022 False]
 [0.03038460101498815 0.0 0.0 ... 4 2020 True]]


# ANN

In [15]:
import numpy as np
import pandas as pd
import keras
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import RandomizedSearchCV
from scikeras.wrappers import KerasRegressor
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import tensorflow as tf
from scipy.stats import reciprocal
from sklearn.model_selection import KFold 

In [None]:
def tune_NN_model(X_train, y_train, X_val, y_val, alpha_input, underage_input, overage_input, patience=10, multi=True, integrated=True, verbose=0, seed=42):

    """ Train a network on the given training data with hyperparameter tuning
    
    Parameters
    --------------
    X_train : np.array
        training feature data
    y_train : np.array
        training targets
    X_train : np.array
        validation feature data
    y_train : np.array
        validation targets
    alpha : np.array
        Substitution rates, shape (N_PRODUCTS, N_PRODUCTS)
    u : np.array
        underage costs, shape (1, N_PRODUCTS)
    o : np.array
        overage costs, shape (1, N_PRODUCTS)
    patience : int
        number of epochs without improvement before stopping training
    verbose : int
        keras' verbose parameter for silent / verbose model training
    seed : int
        random seed (affects mainly model initialization, set for reproducable results)

    Returns
    ----------
    model : keras model
        Final model
    hp : list or tupl
        hyperparameters in the following order: hidden_nodes, lr, max_epochs, patience, batch_size
    val_profit : float
        Mean profit on the validation set
    """
    global alpha, underage, overage
    alpha = alpha_input
    underage = underage_input
    overage = overage_input

    output_shape = y_train.shape[1]
    input_shape = X_train.shape[1]

    # create a neural network model with basic hyperparameters
    early_stopping = EarlyStopping(monitor='val_loss', patience=patience)

    # construct loss function based on the number of products
    if not integrated:
        model_ANN = KerasRegressor(model=create_NN_basic, n_hidden=1,n_neurons=30, activation = 'relu',
                                input_shape=input_shape, learning_rate=0.01, output_shape=output_shape, 
                                seed = seed, verbose=verbose, callbacks=[early_stopping])
    elif not multi and integrated:
        model_ANN = KerasRegressor(model=create_NN_single, n_hidden=1,n_neurons=30, activation = 'relu',
                               input_shape=input_shape, learning_rate=0.01, output_shape=output_shape, 
                               seed = seed, verbose=verbose, callbacks=[early_stopping])

    elif multi and integrated: 
        model_ANN = KerasRegressor(model=create_NN_multi, n_hidden=1,n_neurons=30, activation = 'relu',
                               input_shape=input_shape, learning_rate=0.01, output_shape=output_shape, 
                               seed = seed, verbose=verbose, callbacks=[early_stopping])
    else:
        raise ValueError('Invalid Configuration')
    

    
    # define the hyperparameters space
    param_distribs = {
        "n_hidden": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        "n_neurons": np.arange(1, 30),
        "learning_rate": [0.01,0.001,0.0001,0.00001],
        "batch_size": [16, 32, 64, 128],
        "epochs": [10, 20, 30, 40, 50],
        "activation": ['relu', 'sigmoid', 'tanh']
    }

    # perform GridSearch for hyperparameter tuning
    random_CV = RandomizedSearchCV(model_ANN, param_distribs, n_iter=100, cv=3, scoring='neg_mean_squared_error')
    random_CV_result = random_CV.fit(X_train, y_train, validation_data=(X_val, y_val), verbose=verbose)

    # Get the best parameters and best estimator
    best_params = random_CV_result.best_params_
    best_estimator = random_CV_result.best_estimator_

    # make predictions on validation set and compute profits
    q_val = best_estimator.predict(X_val)
    val_profit = np.mean(nvps_profit(y_val, q_val, alpha, underage, overage))

    hyperparameter = [best_params['n_hidden'], best_params['n_neurons'],best_params['learning_rate'], 
                      best_params['epochs'], patience, best_params['batch_size'], best_params['activation']]

    return best_estimator, hyperparameter, val_profit

In [6]:
def make_nvps_loss(alpha, u, o):

    # transofrm the alpha, u, o to tensors
    u = tf.convert_to_tensor(underage_data, dtype=tf.float32) #underage costs
    o = tf.convert_to_tensor(overage_data, dtype=tf.float32) #overage costs
    alpha = tf.convert_to_tensor(alpha_data, dtype=tf.float32) #substitution matrix

    # define the loss function
    @tf.autograph.experimental.do_not_convert
    def nvps_loss(y_true, y_pred):
        q = tf.maximum(y_pred, 0.)

        # Calculate the demand increase for each product due to substitutions from other products
        demand_increase = tf.matmul( tf.maximum(0.0, y_true - y_pred),alpha)
        # Adjusted demand is the original demand plus the increase due to substitutions
        adjusted_demand = y_true + demand_increase

        profits = tf.matmul(q,tf.transpose(u)) - tf.matmul(tf.maximum(0.0,q - adjusted_demand), tf.transpose(u+o))

        return -tf.math.reduce_mean(profits)
    
    return nvps_loss

In [11]:
def loss_complex(y_true, y_pred):

    # Cast numpy arrays to tensors
    u = tf.convert_to_tensor(underage_data, dtype=tf.float32) #underage costs
    o = tf.convert_to_tensor(overage_data, dtype=tf.float32) #overage costs
    alpha = tf.convert_to_tensor(alpha_data, dtype=tf.float32) #substitution matrix

    # Cast y_true to float32
    y_true = tf.cast(y_true, dtype=tf.float32)

    # Calculate the demand increase for each product due to substitutions from other products
    demand_increase = tf.matmul( tf.maximum(0.0, y_true - y_pred),alpha)

    # Adjusted demand is the original demand plus the increase due to substitutions
    adjusted_demand = y_true + demand_increase

    # Compute the loss with adjusted demand
    loss = -tf.reduce_mean(u * y_pred - (u + o) * tf.maximum(y_pred - adjusted_demand, 0))
    return loss

from keras.utils import get_custom_objects
get_custom_objects().update({'loss_complex': loss_complex})

# Model creation function 
def create_model(n_hidden, n_neurons, learning_rate, activation):
    model = Sequential()
    # Input Layer
    model.add(Dense(n_neurons,input_dim=15, activation=activation))
    # Hidden Layer
    for _ in range(n_hidden):
        model.add(Dense(n_neurons, activation=activation))
    # Output Layer
    model.add(Dense(6))
    model.compile(loss = loss_complex,
                  optimizer=Adam(learning_rate=learning_rate))
    return model

# Model builder function 
def model_builder(n_hidden, n_neurons, learning_rate, activation, batch_size, epochs):
    return KerasRegressor(model=create_model, verbose=0, n_hidden=n_hidden, n_neurons=n_neurons, 
                          learning_rate=learning_rate, activation=activation, batch_size=batch_size, epochs=epochs)

# Create a baseline model
model_ANN = model_builder(11,27,0.00014593446517329207,'relu', 16, 25)


# Define the parameter grid
from scipy.stats import reciprocal
params = {
    "n_hidden": range(0, 15),
    "n_neurons": np.arange(1, 100),
    "learning_rate": reciprocal(1e-4, 1e-2),
    "batch_size": [16, 32, 64, 128],
    "epochs": [10,15, 20, 25, 30],
    "activation": ['relu', 'sigmoid', 'tanh'] #
}

# Optimize the model using RandomizedSearchCV
#rnd_search_cv_ANN = RandomizedSearchCV(model_ANN, param_distributions=params, cv=KFold(10))

# Fit the model
#rnd_search_cv_ANN.fit(X_train, target_train)
#print(rnd_search_cv_ANN.best_params_)

model_ANN.fit(X_train, target_train)
target_pred_ANN = model_ANN.predict(X_test)


  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


ValueError: Could not interpret metric identifier: loss

In [None]:
# Predict with best model
best_model_ANN = rnd_search_cv_ANN.best_estimator_
best_model_ANN.fit(X_train, target_train)
target_pred_ANN = best_model_ANN.predict(X_test)

  X, y = self._initialize(X, y)


KerasRegressor(
	model=None
	build_fn=<function create_model at 0x000001C0FC33F920>
	warm_start=False
	random_state=None
	optimizer=rmsprop
	loss=None
	metrics=None
	batch_size=16
	validation_batch_size=None
	verbose=0
	callbacks=None
	validation_split=0.0
	shuffle=True
	run_eagerly=False
	epochs=25
	n_hidden=11
	n_neurons=27
	learning_rate=0.00014593446517329207
	activation=relu
)


# DT

In [17]:
from typing import Tuple
import numpy as np
import xgboost as xgb

print("XGB Model Tuning")
print(X_train.shape)
print(target_train.shape)
print(alpha_data.shape)
print(underage_data.shape)
print(overage_data.shape)
print("XGB Model Tuning")

def custom_XGB_model():
    
    def gradient(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:
        
        y = dtrain.get_label().reshape(predt.shape)
        d = y + np.matmul(np.maximum(0, y - predt), alpha_data)
        u = np.array(underage_data)
        o = np.array(overage_data)
        return (-(u * np.maximum(0,d-predt) - o * np.maximum(0, predt-d))).reshape(y.size)
              
    def hessian(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:
        return np.ones(predt.shape).reshape(predt.size)
    
    def custom_loss(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[np.ndarray, np.ndarray]:
        grad = gradient(predt, dtrain)
        hess = hessian(predt, dtrain)
        return grad, hess
    
    def newsvendorRMSE(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:
        y = dtrain.get_label().reshape(predt.shape)
        d = y + np.matmul(np.maximum(0, y - predt), alpha_data)
        v = np.sqrt(np.sum(np.power(d - predt, 2)))
        return "newsvendorRMSE", v

    X, y = X_train, target_train  
    Xy = xgb.DMatrix(X, label=y)
    results = {}
    booster = xgb.train(
        {
            "tree_method": "hist",
            "num_target": target_train.shape[1],
            "multi_strategy": "multi_output_tree",
        },
        dtrain=Xy,
        num_boost_round=128,
        obj=custom_loss,
        evals=[(Xy, "Train")],
        evals_result=results,
        custom_metric=newsvendorRMSE
       
    )

    preds = booster.predict(xgb.DMatrix(X_test, label=target_test))
    print("Predictions:", preds)
    return preds

target_pred_DT = custom_XGB_model()


XGB Model Tuning
(800, 15)
(800, 6)
(6, 6)
(6,)
(6,)
XGB Model Tuning
[0]	Train-rmse:46.36803	Train-newsvendorRMSE:4568.74343
[1]	Train-rmse:38.24588	Train-newsvendorRMSE:3737.27768


[2]	Train-rmse:31.73315	Train-newsvendorRMSE:3067.80059
[3]	Train-rmse:26.50656	Train-newsvendorRMSE:2528.40375
[4]	Train-rmse:22.28268	Train-newsvendorRMSE:2090.94756
[5]	Train-rmse:18.88367	Train-newsvendorRMSE:1740.60578
[6]	Train-rmse:16.13330	Train-newsvendorRMSE:1462.56776
[7]	Train-rmse:13.89224	Train-newsvendorRMSE:1237.33341
[8]	Train-rmse:12.04993	Train-newsvendorRMSE:1052.27368
[9]	Train-rmse:10.52957	Train-newsvendorRMSE:900.21597
[10]	Train-rmse:9.26887	Train-newsvendorRMSE:775.09057
[11]	Train-rmse:8.21662	Train-newsvendorRMSE:672.46042
[12]	Train-rmse:7.33108	Train-newsvendorRMSE:588.01726
[13]	Train-rmse:6.58464	Train-newsvendorRMSE:518.13865
[14]	Train-rmse:5.95129	Train-newsvendorRMSE:459.30921
[15]	Train-rmse:5.41336	Train-newsvendorRMSE:409.78031
[16]	Train-rmse:4.95069	Train-newsvendorRMSE:367.59584
[17]	Train-rmse:4.55185	Train-newsvendorRMSE:331.74497
[18]	Train-rmse:4.20692	Train-newsvendorRMSE:301.26788
[19]	Train-rmse:3.90666	Train-newsvendorRMSE:275.23653
[20

# Costs

In [10]:
# Loop over each week in target_test
overall_costs_ANN = 0
overall_costs_DT = 0

for i in range(len(target_test)):
    for j in range(len(target_test.columns)):
        # Calculate understock and overstock costs
        cost_ANN = 0
        cost_DT = 0

        #if target_pred_ANN.iloc[i, j] < target_test.iloc[i, j]:
        #    cost_ANN = (prices[j] - costs[j]) * (target_test.iloc[i, j] - np.round(target_pred_ANN.iloc[i, j]))

        #if target_pred_ANN.iloc[i, j] > target_test.iloc[i, j]:
        #    cost_ANN = (costs[j] - salvages[j]) * (np.round(target_pred_ANN.iloc[i, j]) - target_test.iloc[i, j])
        
        if target_pred_DT[i, j] < target_test.iloc[i, j]:
            cost_DT = (prices[j] - costs[j]) * (target_test.iloc[i, j] - np.round(target_pred_DT[i, j]))

        if target_pred_DT[i, j] > target_test.iloc[i, j]:
            cost_DT = (costs[j] - salvages[j]) * (np.round(target_pred_DT[i, j]) - target_test.iloc[i, j])
        
        # Calculate the total costs for the week
        overall_costs_ANN += cost_ANN
        overall_costs_DT += cost_DT

# Print the overall costs
print('Overall costs for ANN: ', int(overall_costs_ANN))
print('Overall costs for DT: ', int(overall_costs_DT))

Overall costs for ANN:  0
Overall costs for DT:  62
