In [None]:
from custom.GeoSpatialEncoder import GeoSpatialEncoder
from custom.PC_Class import PC
from custom.Pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
import pandas as pd
import numpy as np
from tqdm import tqdm
import warnings
import os
import xgboost as xgb
from custom.DataCreator import InstanceFileWriter, Scenario
warnings.filterwarnings("ignore", message="X does not have valid feature names, but KMeans was fitted with feature names")


direct = os.getcwd()
file_path_cleaned = direct + "////data////vos_input_data////MultiHubData3_cleaned.csv" 
file_path_training = direct + "////data////vos_input_data////MultiHubData3_training.csv" 

def load_data(file_path):
    datetime_cols = ['CREATIONDATETIME', 'LAAD_DATETIME_VAN', 'LAAD_DATETIME_TOT', 'LOS_DATETIME_VAN', 'LOS_DATETIME_TOT', '15CREATIONDATETIME']
    total_rows = sum(1 for row in open(file_path, 'r', encoding='utf-8'))
    chunk_size = 10000  
    tqdm.pandas(desc="Reading CSV")
    chunks = pd.read_csv(file_path, chunksize=chunk_size, iterator=True, index_col = 0, parse_dates=datetime_cols)

    df_orders = pd.concat(tqdm(chunks, total=total_rows//chunk_size))
    print("Lenght of input data:", str(len(df_orders)))
    return df_orders

df_orders = load_data(file_path_cleaned)
df_orders_training = load_data(file_path_training)






PC_obj = PC()
print("PC object created")

## Predictor Class


In [51]:
class Predictor:
    def __init__(self, df_orders, pc=None):
        if pc is None:
            pc = PC()
        else:
            self.pc = pc
        self.pipelines_list = {}
        self.GSE_list = {}
        self.df_orders = df_orders

    def add_pipeline(self, pipeline):
        self.pipelines_list[pipeline.company] = pipeline
        self.pipelines_list[pipeline.company].get_X_and_Y()
        self.pipelines_list[pipeline.company].train_classifier()
        mode = pipeline.mode
        if mode == "parallel":
            self.pipelines_list[pipeline.company].train_parallel_regressor()
        elif mode == "dirty":
            self.pipelines_list[pipeline.company].train_dirty_regressor()
        elif mode == "clean":
            self.pipelines_list[pipeline.company].train_clean_regressor()
        print(f"Added pipeline for company {pipeline.company}")

    def add_GSE(self, company, n_clusters):
        df_temp = self.df_orders[self.df_orders['OPDRACHTGEVERNAAM'] == company]
        self.GSE_list[company] = GeoSpatialEncoder(self.pc)
        self.GSE_list[company].set_verbose(False)
        self.GSE_list[company].set_input_df(df_temp)
        self.GSE_list[company].clean_input_df()
        self.GSE_list[company].train_kmeans(n_clusters, 'SHIPMENT_COUNT')
        self.GSE_list[company].compute_distribution()
        self.GSE_list[company].distribution_per_cluster()
        print(f"Added GSE for company {company} with {n_clusters} clusters")

    def return_delivery_region(self, company, CPC):
        if company in self.GSE_list:
            return self.GSE_list[company].return_cluster(CPC)
        
    
    def predict_order_row(self, row):
        # Generate input row
        row_df = row.to_frame().T
        company = row["OPDRACHTGEVERNAAM"]
        if company not in self.pipelines_list:
            print(f"Company {company} not found in pipelines list")
            return
        pipeline = self.pipelines_list[company]
        return pipeline.predict_demands_with_correction(row_df, softmax=False)
    

    def predict_mean_order_row(self, row):
        # Generate input row
        row_df = row.to_frame().T
        company = row["OPDRACHTGEVERNAAM"]
        if company not in self.pipelines_list:
            print(f"Company {company} not found in pipelines list")
            return
        pipeline = self.pipelines_list[company]
        demand = row["PALLETPLAATSEN"]
        # Creating a dictionary to hold your data
        data = {}
        for i in range(len(self.GSE_list[company].cluster_distribution)):
            region_name = 'REGION_' + str(i)
            data[region_name] = round(self.GSE_list[company].cluster_distribution[i] * demand, 1)

        # Creating a DataFrame from the dictionary
        df = pd.DataFrame([data])
        return df

    def generate_predicted_orders(self, known_orders_a, company):
        df_predicted = pd.DataFrame()
        c_known_orders_a = condense_a_orders(known_orders_a)
        c_known_orders_a = c_known_orders_a[c_known_orders_a["OPDRACHTGEVERNAAM"] == company]
        for _ ,row in c_known_orders_a.iterrows():
            if company not in self.pipelines_list:
                # print(f"Company {company} not found in pipelines list")
                continue
            pred = self.predict_order_row(row)
            pred['MATCHING_KEY'] = row['MATCHING_KEY']
            pred["LAAD_CPC"] = row["LAAD_CPC"]
            df_predicted = pd.concat([df_predicted,pred])
        return df_predicted  
    
    def generate_mean_orders(self, known_orders_a, company):
        df_predicted = pd.DataFrame()
        c_known_orders_a = condense_a_orders(known_orders_a)
        c_known_orders_a = c_known_orders_a[c_known_orders_a["OPDRACHTGEVERNAAM"] == company]
        for _ ,row in c_known_orders_a.iterrows():
            if company not in self.pipelines_list:
                # print(f"Company {company} not found in pipelines list")
                continue
            pred = self.predict_mean_order_row(row)
            pred['MATCHING_KEY'] = row['MATCHING_KEY']
            pred["LAAD_CPC"] = row["LAAD_CPC"]
            df_predicted = pd.concat([df_predicted,pred])
        return df_predicted
    
    def get_satisfied_demand_for_company(self, df_d, company):
        n_regions = self.GSE_list[company].return_n_clusters()
        df_d = df_d[df_d["OPDRACHTGEVERNAAM"] == company]
        df_d = df_d[df_d["AFHCODE"] == 'd']
        df_d = df_d[df_d["HAS_PICKUP_TRIP"] == False]
        df_d["MATCHING_KEY"] = df_d['OPDRACHTGEVERID'].astype(str) + '_' + df_d['LAAD_DATETIME_VAN'].dt.date.astype(str) + '_' + df_d['LAADPC'].astype(str)
        df_d['REGION'] = df_d.apply(lambda row: self.return_delivery_region(row['OPDRACHTGEVERNAAM'], row['LOS_CPC']), axis=1)
        df_d_aggregated = df_d.groupby(['MATCHING_KEY', 'REGION']).agg({
                'AFHCODE': 'count',
                'OPDRACHTGEVERNAAM': 'first',
                'PALLETPLAATSEN': 'sum',
            }).reset_index().rename(columns={'AFHCODE': 'AANTALORDERS'})
        pivot_df = df_d_aggregated.pivot_table(index='MATCHING_KEY', columns='REGION', values='PALLETPLAATSEN', aggfunc='sum', fill_value=0)
        pivot_df.columns = ['REGION_' + str(col) for col in pivot_df.columns]
        for i in range(n_regions):  
            column_name = 'REGION_' + str(i)
            if column_name not in pivot_df.columns:
                pivot_df[column_name] = 0  # Add missing region columns
        pivot_df = pivot_df[sorted(pivot_df.columns, key=lambda x: int(x.split('_')[1]))]
        pivot_df.reset_index(inplace=True)
        return pivot_df

## Create predictor object

In [None]:
import json
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
with open('final_pipeline_parameters.json', "r") as f:
    parameters = json.load(f)

predictor = Predictor(df_orders_training, pc= PC_obj)

for company in parameters:
    print(company)
    # Create GSE
    predictor.add_GSE(company, parameters[company]["n_clusters"])

    # Create pipeline (self, classifier, regressor, mode, df, company)
    classifier = xgb.XGBClassifier(random_state = 42,
                                   n_estimators=parameters[company]['class_parameters']['n_estimators'],
                                   max_depth=parameters[company]['class_parameters']['max_depth'],
                                   learning_rate=parameters[company]['class_parameters']['learning_rate'])
    regressor = xgb.XGBRegressor(
                    random_state = 42,
                    n_estimators = parameters[company]['regressor_parameters']['n_estimators'],
                    max_depth = parameters[company]['regressor_parameters']['max_depth'],
                    learning_rate = parameters[company]['regressor_parameters']['learning_rate']
                )
    df = predictor.GSE_list[company].condense_orders()
    predictor.add_pipeline(Pipeline(classifier, regressor, "parallel", df, company))
    


    

## Misc functions

In [62]:
def SplitDataByTime(df, delivery_day, planning_day, split_time):
    date = pd.Timestamp(delivery_day)
    split_time = pd.Timestamp(split_time)
    date_minus_1 = pd.Timestamp(planning_day)
    df_orders_filtered = df[df['LOS_DATETIME_VAN'].dt.date == pd.Timestamp(date).date()] # orders delivered on date
    split_datetime = pd.Timestamp(date_minus_1).replace(hour=split_time.hour, minute=split_time.minute)

    df_orders_filtered_pre = df_orders_filtered[df_orders_filtered['15CREATIONDATETIME'] < split_datetime]
    df_orders_filtered_post = df_orders_filtered[df_orders_filtered['15CREATIONDATETIME'] >= split_datetime]
    return df_orders_filtered_pre, df_orders_filtered_post

def SplitDFtoAandDbyTime(df, delivery_day, planning_day, planning_day_split_time):
    delivery_day = pd.Timestamp(delivery_day)
    planning_day = pd.Timestamp(planning_day)
    split_time = pd.Timestamp(planning_day_split_time)
    split_datetime = pd.Timestamp(planning_day).replace(hour=split_time.hour, minute=split_time.minute)
    df_a = df[(df['LOS_DATETIME_VAN'].dt.date == planning_day.date()) & (df['AFHCODE'] == 'a')]
    df_d = df[(df['LOS_DATETIME_VAN'].dt.date == delivery_day.date()) & (df['AFHCODE'] == 'd')]

    df_a_pre = df_a[df_a['15CREATIONDATETIME'] < split_datetime]
    df_d_pre = df_d[df_d['15CREATIONDATETIME'] < split_datetime]
    return df_a_pre, df_d_pre

def condense_a_orders(df_a):
    df_a = df_a[df_a["AFHCODE"] == 'a']
    df_a["MATCHING_KEY"] = df_a['OPDRACHTGEVERID'].astype(str) + '_' + df_a['LAAD_DATETIME_VAN'].dt.date.astype(str) + '_' + df_a['LAADPC'].astype(str)
    # Step 3: Aggregate data for AFHCODE == 'a'
    df_a_aggregated = df_a.groupby('MATCHING_KEY').agg({
            'CREATIONDATETIME': 'first',
            'AFHCODE': 'count',
            'OPDRACHTGEVERNAAM': 'first',
            'OPDRACHTGEVERID': 'first',
            'PALLETPLAATSEN': 'sum',
            'LAADPC': 'first',
            'LAAD_CPC': 'first',
            'LAAD_DATETIME_VAN': 'first'
        }).reset_index().rename(columns={'AFHCODE': 'AANTALORDERS'})
    df_a_aggregated['dayofweekcreation'] = pd.to_datetime(df_a_aggregated['CREATIONDATETIME']).dt.dayofweek
    df_a_aggregated['weeknr'] = df_a_aggregated["CREATIONDATETIME"].dt.strftime("%V")
    return df_a_aggregated

def condense_d_orders(df_d, GSE):
    df_d = df_d[df_d["AFHCODE"] == 'd']
    df_d["MATCHING_KEY"] = df_d['OPDRACHTGEVERID'].astype(str) + '_' + df_d['LAAD_DATETIME_VAN'].dt.date.astype(str) + '_' + df_d['LAADPC'].astype(str)
    df_d['REGION'] = df_d['LAADPC'].apply(lambda x: GSE.get_region(x))
    # Step 3: Aggregate data for AFHCODE == 'a'
    df_d_aggregated = df_d.groupby('MATCHING_KEY').agg({
            'CREATIONDATETIME': 'first',
            'AFHCODE': 'count',
            'OPDRACHTGEVERNAAM': 'first',
            'OPDRACHTGEVERID': 'first',
            'PALLETPLAATSEN': 'sum',
            'LAADPC': 'first',
            'LAAD_DATETIME_VAN': 'first'
        }).reset_index().rename(columns={'AFHCODE': 'AANTALORDERS'})
    return df_d_aggregated

def compensate_predicted_volumesold(df_predicted, df_satisfied, known_orders_a):
    # Set 'MATCHING_KEY' as the index for both DataFrames if not already
    if df_predicted.index.name != 'MATCHING_KEY':
        df_predicted = df_predicted.set_index('MATCHING_KEY')
    if df_satisfied.index.name != 'MATCHING_KEY':
        df_satisfied = df_satisfied.set_index('MATCHING_KEY')
    
    pred_dict = df_predicted.to_dict(orient='index')
    satisfied_dict = df_satisfied.to_dict(orient='index')
    known_orders_a_c = condense_a_orders(known_orders_a)
    known_orders_a_c.set_index('MATCHING_KEY', inplace=True)
    known_a_dict = known_orders_a_c.to_dict(orient='index')
    for matchingkey in pred_dict:
        volume_predicted = sum([pred_dict[matchingkey][region] for region in pred_dict[matchingkey] if region != 'LAAD_CPC'])
        volume_satisfied = 0
        if matchingkey in satisfied_dict:
            volume_satisfied = sum([satisfied_dict[matchingkey][region] for region in satisfied_dict[matchingkey] if region != 'LAAD_CPC'])
        volume_given = 0
        if matchingkey in known_a_dict:
            volume_given = known_a_dict[matchingkey]['PALLETPLAATSEN']
        # if  the volume sattisfied is bigger than the amount predicted, drop prediction rows
        if volume_satisfied >= volume_predicted:
            df_predicted.drop(matchingkey, inplace=True)      

    # Store the 'LAAD_CPC' from df_predicted before reindexing
    laad_cpc = df_predicted['LAAD_CPC']

    # Ensure both DataFrames have only the columns that are in df_predicted for region calculations
    common_columns = df_predicted.columns.intersection(df_satisfied.columns)
    # Reindex both dataframes to match df_predicted's rows and common columns, fill missing data with zeros
    df_predicted = df_predicted.reindex(columns=common_columns, fill_value=0)
    df_satisfied = df_satisfied.reindex(index=df_predicted.index, columns=common_columns, fill_value=0)
    
    # Subtract the satisfied demand from the predicted demand
    result_df = df_predicted - df_satisfied

    # Ensure no negative values
    result_df = result_df.clip(lower=0)
    # als unsatisfied (predictions waarvoor nog geen d demand is) groter is dan opgegeven volume - volume d satisfied, scale die row predictions in results_df zodat het gelijk word
    dict_unsatisfied = result_df.to_dict(orient='index')
    for key in dict_unsatisfied:
        volume_satisfied = 0
        if key in satisfied_dict:
            volume_satisfied = sum([satisfied_dict[key][region] for region in satisfied_dict[key] if region != 'LAAD_CPC'])
        volume_unsatisfied = sum([dict_unsatisfied[key][region] for region in dict_unsatisfied[key] if region != 'LAAD_CPC'])
        volume_given = 0
        if key in known_a_dict:
            volume_given = known_a_dict[key]['PALLETPLAATSEN']
        # print(f"Volume unsatisfied: {volume_unsatisfied}, Volume a given: {volume_given}, Volume d satisfied: {volume_satisfied}")
        if volume_unsatisfied > volume_given - volume_satisfied:
            scale_factor = min(1, (volume_given - volume_satisfied) / volume_unsatisfied)
            # List of columns to scale, exclude 'LAAD_CPC'
            columns_to_scale = [col for col in result_df.columns if col != 'LAAD_CPC' and col.startswith('REGION')]
            if key in result_df.index:
                # Apply scaling directly to the specific row for the specified columns
                result_df.loc[key, columns_to_scale] *= scale_factor
                
                # Now, apply floor to round down the values in the scaled columns
                result_df.loc[key, columns_to_scale] = np.round(result_df.loc[key, columns_to_scale])


    # Add 'LAAD_CPC' back to result_df
    result_df['LAAD_CPC'] = laad_cpc

    # Reset index if you want 'MATCHING_KEY' back as a column
    result_df.reset_index(inplace=True)

    return result_df

def compensate_predicted_volumes(df_predicted, df_satisfied, known_orders_a):
    warnings.filterwarnings(
    action='ignore', 
    message="Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas."
)
    if df_predicted.index.name != 'MATCHING_KEY':
        df_predicted = df_predicted.set_index('MATCHING_KEY')
    if df_satisfied.index.name != 'MATCHING_KEY':
        df_satisfied = df_satisfied.set_index('MATCHING_KEY')
    df_predicted['total_predicted'] = df_predicted[[column for column in df_predicted.columns if "REGION" in column]].sum(axis=1)
    df_satisfied['total_satisfied'] = df_satisfied[[column for column in df_satisfied.columns if "REGION" in column]].sum(axis=1)


    # Step 1: Make a copy of df_predicted to result_df
    temp_df = df_predicted - df_satisfied
    result_df = df_predicted.copy()


    common_columns = df_satisfied.columns.intersection(result_df.columns)
    common_rows = df_satisfied.index.intersection(result_df.index)

    subtraction_result = temp_df.loc[common_rows, common_columns]
    subtraction_result = subtraction_result.astype('float64')

    # Step 4: Update result_df with the subtraction results
    result_df.loc[common_rows, common_columns] = subtraction_result


    result_df[common_columns] = result_df[common_columns].clip(lower=0)
    result_df['total_satisfied'] = df_satisfied['total_satisfied']
    # result_df['total_predicted'] = result_df.sum(axis=1)
    result_df["should_predicted"] = (result_df['total_predicted'] - result_df['total_satisfied']).clip(lower=0)
    result_df["scaling_factor"] = result_df["should_predicted"] / result_df["total_predicted"]
    result_df["scaling_factor"] = result_df["scaling_factor"].fillna(1)
    result_df[common_columns] = result_df[common_columns].multiply(result_df["scaling_factor"], axis=0)
    result_df.drop(columns=["total_predicted", "total_satisfied", "should_predicted", "scaling_factor"], inplace=True)
    result_df.reset_index(inplace=True)
    result_df[common_columns] = np.round(result_df[common_columns])
    return result_df



def write_to_csv(df, file_name):
    path = #filepath heree
    
    file_name = path + file_name
    df.to_csv(file_name)

def export_scenario(file_name, df_scenario):
    df_scenario = df_scenario[df_scenario["PALLETPLAATSEN"] < 32]
    columns_to_keep = [
                "SHIPMENTNUMBER", "STATUS", "CREATIONDATETIME", "AFHCODE", "OPDRACHTGEVERID", "OPDRACHTGEVERNAAM",
                "LAADZOEK", "LAADADRES", "LAADPLAATS", "LAADPC", "LAADLAND", "LAAD_DATETIME_VAN", "LAAD_DATETIME_TOT",
                "LAADPLANK", "LOSZOEK", "LOSADRES", "LOSPLAATS", "LOSPC", "LOSLAND", "LOS_DATETIME_VAN", "LOS_DATETIME_TOT",
                "LOSPLANK", "COLLIAANTAL", "COLLICODE", "PALLETPLAATSEN", "LAADRIT", "LOSRIT", "HAS_PICKUP_TRIP", 
                "HAS_DELIVERY_TRIP", "15CREATIONDATETIME", "LAAD_CPC", "LOS_CPC"
            ]
    group_by_columns = ['LAAD_CPC', 'OPDRACHTGEVERID', 'LOS_CPC']
    agg_dict = {col: 'last' for col in columns_to_keep if col not in group_by_columns and col != 'PALLETPLAATSEN' and col != 'SHIPMENTNUMBER'}
    agg_dict.update({
        'PALLETPLAATSEN': 'sum',
        'SHIPMENTNUMBER': lambda x: list(x),
        'COUNTER': 'count'  # Add a count for the number of orders
    })
    df_scenario = df_scenario.copy()
    df_scenario.loc[:, "COUNTER"] = df_scenario.loc[:, "SHIPMENTNUMBER"].copy()
    df_grouped = df_scenario.groupby(group_by_columns).agg(agg_dict).reset_index()
    df_grouped.rename(columns={'COUNTER': 'ORDER_COUNT'}, inplace=True)
    df_grouped = df_grouped[df_grouped["PALLETPLAATSEN"] < 32]
    write_to_csv(df_grouped, file_name)
    return file_name

def add_matching_key(df):
    pd.options.mode.chained_assignment = None
    df["MATCHING_KEY"] = df['OPDRACHTGEVERID'].astype(str) + '_' + df['LAAD_DATETIME_VAN'].dt.date.astype(str) + '_' + df['LAADPC'].astype(str)
    return df

## Single day

In [None]:
from datetime import datetime, timedelta


date = #sample date here
planning_date = #Planning date here

# times = [(datetime.strptime("10:00", "%H:%M") + i * timedelta(minutes=15)).strftime("%H:%M") 
#          for i in range((datetime.strptime("18:00", "%H:%M") - datetime.strptime("10:00", "%H:%M")) // timedelta(minutes=15) + 1)]
# times = ["12:00"] 
# times = ["6:00", "7:00", "8:00", "9:00", "10:00", "11:00", "12:00", "13:00", "14:00", "15:00", "16:00", "17:00", "18:00", "19:00", "20:00"] 
times = ["10:00", "11:00", "12:00", "13:00", "14:00", "15:00", "16:00", "17:00", "18:00", "19:00"] 
n_scenarios = 20  

print_bool = False

df_orders_d = df_orders[df_orders["AFHCODE"] == 'd']
df_orders_a = df_orders[df_orders["AFHCODE"] == 'a']
time_list = []
simulated_list = []
simulated_volume_list = []
known_list = []
known_volume_list = []
known_a_volume_list = []
simulated_known_list = []
simulated_known_volume_list = []
total_a_satisfied_by_d_orders = []


list_total_d_orders = []
list_total_a_orders = []
list_total_unpredictable_a_orders = []
list_total_predictable_a_orders = []
list_total_a_orders_satisfied_by_d_orders = []
list_total_a_orders_not_satisfied_by_d_orders  = []
list_total_a_orders_predicted  = []
list_total_simulated_orders_after_correction = []
list_total_simulated_orders_total = []

names = []
for time in times:
    number = 0
    for x in range(n_scenarios):
        file_name = date + "_" + time.replace(":", "") + "_DumbScenario_" + str(number) + ".csv"
        df_simulated_orders = pd.DataFrame()
    # for time in ["8:00"]:
        known_orders_a, known_orders_d = SplitDFtoAandDbyTime(df_orders, date, planning_date, time)
        if x == 0:
            df_scenario = known_orders_d
            # export_scenario(file_name ,df_scenario)
            number += 1
            continue
        
        pallets_satisfied = 0
        pallets_predicted = 0
        pallets_unsatisfied = 0
        total_unsatisfied = 0
        total_satisfied_by_d_orders = 0

        

        if print_bool:
            print(f"Total d orders:                                 {known_orders_d['PALLETPLAATSEN'].sum()}")
            print("                                          ------------------------------------")
            print(f"Total a orders:                                 { known_orders_a['PALLETPLAATSEN'].sum()}")
        total_unpredictable_a_orders = 0
        total_predictable_a_orders = 0
        total_a_orders_satisfied_by_d_orders = 0
        total_a_orders_not_satisfied_by_d_orders = 0
        total_a_orders_predicted = 0
        total_predicted_orders_after_correction = 0
        total_a_orders_unsatisfied = 0

        shipment_number = 5000000000
        # for company in ['BOSCH_THERMOTECHNIEK_BV']:
        for company in known_orders_a["OPDRACHTGEVERNAAM"].unique():
            df_company_scenario = pd.DataFrame()
            df_predicted = 0
            total_unpredictable_a_orders = known_orders_a[~known_orders_a["OPDRACHTGEVERNAAM"].isin(predictor.GSE_list)]["PALLETPLAATSEN"].sum()
            total_predictable_a_orders = known_orders_a[known_orders_a["OPDRACHTGEVERNAAM"].isin(predictor.GSE_list)]["PALLETPLAATSEN"].sum()
            if company in predictor.GSE_list:
                
                add_bool = False
                df_satisfied = predictor.get_satisfied_demand_for_company(known_orders_d, company)
                pallets_satisfied += sum([df_satisfied[region].sum() for region in df_satisfied.columns if region != 'MATCHING_KEY'])
                #df_predicted = predictor.generate_predicted_orders(known_orders_a, company) #for predicted orders
                df_predicted = predictor.generate_mean_orders(known_orders_a, company) # For mean orders
                
                total_a_orders_predicted += sum([df_predicted[region].sum() for region in df_predicted.columns if region != 'MATCHING_KEY' if region != 'LAAD_CPC'])

                df_orders_company = add_matching_key(pd.concat([known_orders_a[known_orders_a['OPDRACHTGEVERNAAM'] == company], known_orders_d[known_orders_d['OPDRACHTGEVERNAAM'] == company]]))
                
                condensed_orders = predictor.GSE_list[company].condense_orders(df=df_orders_company)
                total_a_orders_satisfied_by_d_orders += np.minimum(condensed_orders['PALLETPLAATSEN'], condensed_orders['PALLETPLAATSEN_ACTUAL']).sum()
                total_a_orders_not_satisfied_by_d_orders += (condensed_orders['PALLETPLAATSEN'] - condensed_orders['PALLETPLAATSEN_ACTUAL']).clip(lower=0).sum()
                
                
                df_unsatisfied = compensate_predicted_volumes(df_predicted, df_satisfied, known_orders_a)
                pallets_unsatisfied += sum([df_unsatisfied[region].sum() for region in df_unsatisfied.columns if region != 'MATCHING_KEY' if region != 'LAAD_CPC'])
                # Generate scenario rows for unsatisfied demand
                
                for _, row in df_unsatisfied.iterrows():
                    temp_dict = row.to_dict()
                    for key in temp_dict.keys():
                        id = temp_dict["MATCHING_KEY"].split("_")[0]
                        if key != "MATCHING_KEY" and key != "LAAD_CPC":
                            add_bool = True
                            while temp_dict[key] > 0:
                                region_number = key.split("_")[1]
                                delivery_CPC = predictor.GSE_list[company].return_random_CPC_from_clusternr(int(region_number))
                                date_obj = datetime.strptime(date, '%Y-%m-%d').date()
                                sample_order_size = predictor.GSE_list[company].sample_order_size_of_CPC(delivery_CPC)
                                if sample_order_size > temp_dict[key]:
                                    sample_order_size = temp_dict[key]
                                temp_dict[key] -= sample_order_size
                                shipment_number += 1
                                
                                data = {'LOS_CPC': delivery_CPC,
                                        'LAAD_CPC': temp_dict["LAAD_CPC"],
                                        'OPDRACHTGEVERNAAM': company,
                                        'OPDRACHTGEVERID': id,
                                        'SHIPMENTNUMBER': shipment_number,
                                        'PALLETPLAATSEN': sample_order_size,
                                        'LOS_DATETIME_VAN': datetime.combine(date_obj, datetime.strptime("9:00", "%H:%M").time()),
                                        'LOS_DATETIME_TOT': datetime.combine(date_obj, datetime.strptime("17:00", "%H:%M").time())}
                                df_company_scenario = pd.concat([df_company_scenario, pd.DataFrame([data])])
            df_simulated_orders = pd.concat([df_simulated_orders, df_company_scenario])
        
        total_simulated_orders_after_correction = df_simulated_orders['PALLETPLAATSEN'].sum()

        df_scenario = pd.concat([df_simulated_orders, known_orders_d])
        export_scenario(file_name ,df_scenario)
        print(file_name)
        number += 1
        if print_bool:
            print(f"Total unpredictable a orders due to no model:   {total_unpredictable_a_orders}")
            print("                                          ------------------------------------")
            print(f"Total predictable a orders:                     {total_predictable_a_orders}")
            
            print(f"Total a orders satisfied by d orders:           {total_a_orders_satisfied_by_d_orders}")
            print("                                          ------------------------------------")
            print(f"Total a orders not satisfied by d orders:       {total_a_orders_not_satisfied_by_d_orders}")
            print(f"Total a orders predicted:                       {total_a_orders_predicted}")
            print(f"Total predicted orders after correction:        {total_simulated_orders_after_correction}")
            print("                                          ------------------------------------")
            print(f"Total simulated orders:                         {df_scenario['PALLETPLAATSEN'].sum()}")

            # total_predicted_orders_after_correction = 0
            # total_a_orders_unsatisfied = 0
        list_total_d_orders.append(known_orders_d['PALLETPLAATSEN'].sum())
        list_total_a_orders.append(total_predictable_a_orders) 
        list_total_unpredictable_a_orders.append(total_unpredictable_a_orders)
        list_total_predictable_a_orders.append(total_predictable_a_orders)
        list_total_a_orders_satisfied_by_d_orders.append(total_a_orders_satisfied_by_d_orders)
        list_total_a_orders_not_satisfied_by_d_orders.append(total_a_orders_not_satisfied_by_d_orders)
        list_total_a_orders_predicted.append(total_a_orders_predicted)
        list_total_simulated_orders_after_correction.append(total_simulated_orders_after_correction)
        list_total_simulated_orders_total.append(total_simulated_orders_after_correction + known_orders_d['PALLETPLAATSEN'].sum())


        time_list.append(time)
    # # simulated_list.append(len(df_simulated_orders))
    # # simulated_volume_list.append(df_simulated_orders['PALLETPLAATSEN'].sum())
    # # known_list.append(len(known_orders_d))
    # # known_volume_list.append(known_orders_d['PALLETPLAATSEN'].sum())
    # # simulated_known_list.append(len(df_scenario))
    # # simulated_known_volume_list.append(df_scenario['PALLETPLAATSEN'].sum())
    # # known_a_volume_list.append(known_orders_a['PALLETPLAATSEN'].sum())
    # # total_a_satisfied_by_d_orders.append(total_satisfied_by_d_orders)

    # # print(f"Time: {time}")
    # # print(f"Volume of simulated orders:         {df_simulated_orders['PALLETPLAATSEN'].sum()}")
    # # print(f"Volume of known delivery orders:    {known_orders_d['PALLETPLAATSEN'].sum()}")
    # # print(f"Volume of known a orders:           {known_orders_a['PALLETPLAATSEN'].sum()}")
    # # print(f"Volume of simulated + kown orders:  {df_scenario['PALLETPLAATSEN'].sum()}")
    # # print(f"Total a orders satisfied by d orders:        {total_satisfied_by_d_orders}")

In [None]:
# Plot results on a graph
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(time_list, list_total_simulated_orders_after_correction, label='Simulated Delivery Orders')
plt.plot(time_list, list_total_d_orders, label='Known Delivery Orders')
plt.plot(time_list, list_total_a_orders_satisfied_by_d_orders, label='Pickup Orders Satisfied by Delivery Orders')
plt.plot(time_list, list_total_a_orders, label='Known Pickup Orders')
plt.plot(time_list, list_total_simulated_orders_total, label='Simulated Delivery + Known Delivery Orders')

plt.xlabel('Time', fontsize=14)
plt.xticks(rotation=45, fontsize=12)
plt.ylim(0, max(list_total_simulated_orders_total) + 100)
plt.ylabel('Volume of Orders (Pallets)', fontsize=14)
plt.title('Volume of Orders over Time', fontsize=16)
plt.legend(fontsize=12)
plt.grid(True)
plt.tight_layout()
plt.show()


# list_total_d_orders.append(known_orders_d['PALLETPLAATSEN'].sum())
#     list_total_a_orders.append(total_predictable_a_orders) 
#     list_total_unpredictable_a_orders.append(total_unpredictable_a_orders)
#     list_total_predictable_a_orders.append(total_predictable_a_orders)
#     list_total_a_orders_satisfied_by_d_orders.append(total_a_orders_satisfied_by_d_orders)
#     list_total_a_orders_not_satisfied_by_d_orders.append(total_a_orders_not_satisfied_by_d_orders)
#     list_total_a_orders_predicted.append(total_a_orders_predicted)
#     list_total_simulated_orders_after_correction.append(total_simulated_orders_after_correction)
#     list_total_simulated_orders_total

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(time_list, list_total_simulated_orders_after_correction, label='Simulated Delivery Orders')
plt.plot(time_list, list_total_d_orders, label='Known Delivery Orders')
plt.plot(time_list, list_total_a_orders_satisfied_by_d_orders, label='Pickup Orders Satisfied by Delivery Orders')
plt.plot(time_list, list_total_a_orders, label='Known Pickup Orders')
plt.plot(time_list, list_total_simulated_orders_total, label='Simulated Delivery + Known Delivery Orders')

plt.xlabel('Time', fontsize=14)
plt.xticks(rotation=45, fontsize=12)
plt.ylim(0, max(list_total_simulated_orders_total) + 100)
plt.ylabel('Volume of Orders (Pallets)', fontsize=14)
plt.title('Volume of Orders over Time', fontsize=16)
plt.legend(fontsize=12)
plt.grid(True)

# Remove y-axis labels
plt.gca().set_yticklabels([])

plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Calculate the maximum value of list_total_d_orders
max_value = max(list_total_d_orders)

# Define y-ticks from 0% to 150% with a step of 10%
yticks = np.arange(0, 111, 10)
ytick_labels = [f'{y}%' for y in yticks]

# Plot results on a graph
plt.figure(figsize=(10, 6))
plt.plot(time_list, list_total_simulated_orders_after_correction, label='Simulated Delivery Orders')
plt.plot(time_list, list_total_d_orders, label='Known Delivery Orders')
plt.plot(time_list, list_total_a_orders_satisfied_by_d_orders, label='Pickup Orders Satisfied by Delivery Orders')
plt.plot(time_list, list_total_a_orders, label='Known Pickup Orders')
plt.plot(time_list, list_total_simulated_orders_total, label='Simulated Delivery + Known Delivery Orders')

plt.xlabel('Time', fontsize=14)
plt.xticks(rotation=45, fontsize=12)
plt.ylabel('Percentage of Maximum Orders', fontsize=14)
plt.title('Volume of Orders over Time', fontsize=16)
plt.legend(fontsize=12)
plt.grid(True)

# Replace y-axis values with percentages and set custom ticks
ax = plt.gca()
ax.set_yticks(yticks / 100 * max_value)  # Set y-ticks at intervals corresponding to 10% steps beyond 100%
ax.set_yticklabels(ytick_labels, fontsize=12)  # Set y-tick labels as percentages

plt.tight_layout()
plt.show()

In [None]:
list_total_simulated_orders_total

In [None]:
# Plot results on a graph
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.plot(time_list, list_total_d_orders, label='Known d orders')
plt.plot(time_list, list_total_a_orders, label='Known a orders')

plt.plot(time_list, list_total_a_orders_satisfied_by_d_orders, label='a orders satisfied by d orders')
plt.plot(time_list, list_total_a_orders_not_satisfied_by_d_orders, label='a orders not satisfied by d orders')
plt.plot(time_list, list_total_simulated_orders_after_correction, label='Simulated orders')
plt.plot(time_list, list_total_simulated_orders_total, label='Simulated + Known orders')
plt.xlabel('Time')
plt.xticks(rotation=45)
plt.ylim(0, max(list_total_simulated_orders_total) + 100)
plt.ylabel('Volume of orders (pallets)')
plt.title('Volume of orders over time')
plt.legend()
plt.show()


# list_total_d_orders.append(known_orders_d['PALLETPLAATSEN'].sum())
#     list_total_a_orders.append(total_predictable_a_orders) 
#     list_total_unpredictable_a_orders.append(total_unpredictable_a_orders)
#     list_total_predictable_a_orders.append(total_predictable_a_orders)
#     list_total_a_orders_satisfied_by_d_orders.append(total_a_orders_satisfied_by_d_orders)
#     list_total_a_orders_not_satisfied_by_d_orders.append(total_a_orders_not_satisfied_by_d_orders)
#     list_total_a_orders_predicted.append(total_a_orders_predicted)
#     .append(total_simulated_orders_after_correction)
    

In [None]:
df_scenario.to_excel("scenario.xlsx")