In [None]:
from custom.GeoSpatialEncoder import GeoSpatialEncoder
from custom.PC_Class import PC
from custom.Pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
import pandas as pd
import numpy as np
from tqdm import tqdm
import warnings
import os
import xgboost as xgb
from custom.DataCreator import InstanceFileWriter, Scenario
warnings.filterwarnings("ignore", message="X does not have valid feature names, but KMeans was fitted with feature names")

def load_data(file_path):
    datetime_cols = ['CREATIONDATETIME', 'LAAD_DATETIME_VAN', 'LAAD_DATETIME_TOT', 'LOS_DATETIME_VAN', 'LOS_DATETIME_TOT', '15CREATIONDATETIME']
    total_rows = sum(1 for row in open(file_path, 'r', encoding='utf-8'))
    chunk_size = 10000  
    tqdm.pandas(desc="Reading CSV")
    chunks = pd.read_csv(file_path, chunksize=chunk_size, iterator=True, index_col = 0, parse_dates=datetime_cols)

    df_orders = pd.concat(tqdm(chunks, total=total_rows//chunk_size))
    print("Lenght of input data:", str(len(df_orders)))
    return df_orders

PC_obj = PC()
GSE = GeoSpatialEncoder(PC_obj)

In [None]:
path = #path to scenarios here
files = os.listdir(path)
files = [files.replace(".csv", "") for files in files]
# Function to check if any number in the list is >= 5000000000

def check_simulated(shipment_list):
    shipment_list = shipment_list.replace("[","").replace("]","").replace(" ","").split(",")
    return 1 if any(int(num) >= 5000000000 for num in shipment_list) else 0

# Apply the function to the SHIPMENTNUMBER column


instance_name = # random instance name here
scenario = instance_name.split("_")[3]
print("Instance name:", instance_name)
df_scenario = load_data(path + "//" + instance_name + ".csv")
df_scenario['SIMULATED'] = df_scenario['SHIPMENTNUMBER'].apply(check_simulated)
GSE.plot_scenario_coordinates(df_scenario, title="Scenario without simulated orders")

for instance_name in files:
    if "RANDOMDATE" in instance_name: # REPLACE WITH A SCENARIO
        scenario = instance_name.split("_")[3]
        if int(scenario) < 3 and int(scenario) > 0:
            print("Instance name:", instance_name)
            df_scenario = load_data(path + "//" + instance_name + ".csv")
            df_scenario['SIMULATED'] = df_scenario['SHIPMENTNUMBER'].apply(check_simulated)
            GSE.plot_scenario_coordinates(df_scenario, title="Scenario with simulated orders: " + scenario)

instance_name = "INSTANCENAME_HERE"
scenario = instance_name.split("_")[3]
print("Instance name:", instance_name)
df_scenario = load_data(path + "//" + instance_name + ".csv")
df_scenario['SIMULATED'] = df_scenario['SHIPMENTNUMBER'].apply(check_simulated)
GSE.plot_scenario_coordinates(df_scenario, title="End of day situation without simulated orders")

In [None]:
df_filtered = df_scenario[(df_scenario['AFHCODE'].isin(['d'])) 
                                         #& (df_input['STATUS'] != 990) 
                                         & (df_scenario['LOSLAND'].isin(['NL', 'BE', 'LU']))].copy()
df_filtered["LOSLAND"] = df_filtered["LOSLAND"].astype(str).copy()  # Ensure 'LOSLAND' column is of string type
df_filtered["LOSPC"] = df_filtered["LOSPC"].astype(str).copy()  # Ensure 'LOSPC' column is of string type
df_filtered["LOS_CPC"] = df_filtered.apply(lambda row: PC_obj.return_CPC(row["LOSLAND"], row["LOSPC"]), axis=1)

df_filtered = df_filtered[df_filtered["LOS_CPC"] != 0]
df_filtered["COORDINATES"] = df_filtered.apply(lambda row: PC_obj.return_CPC_coordinates(row["LOS_CPC"]), axis=1)
df_filtered[["LOS_LAT", "LOS_LON"]] = pd.DataFrame(df_filtered["COORDINATES"].tolist(), index=df_filtered.index)
df_filtered