In [1]:
def SplitDataByTime(df, delivery_day, planning_day, split_time):
    date = pd.Timestamp(delivery_day)
    split_time = pd.Timestamp(split_time)
    date_minus_1 = pd.Timestamp(planning_day)
    df_orders_filtered = df[df['LOS_DATETIME_VAN'].dt.date == pd.Timestamp(date).date()] # orders delivered on date
    split_datetime = pd.Timestamp(date_minus_1).replace(hour=split_time.hour, minute=split_time.minute)

    df_orders_filtered_pre = df_orders_filtered[df_orders_filtered['15CREATIONDATETIME'] < split_datetime]
    df_orders_filtered_post = df_orders_filtered[df_orders_filtered['15CREATIONDATETIME'] >= split_datetime]
    return df_orders_filtered_pre, df_orders_filtered_post

def write_to_csv(df, file_name):
    path = #WRITE LOCATION HERE
    file_name = path + file_name
    df.to_csv(file_name)

In [None]:
from custom.GeoSpatialEncoder import GeoSpatialEncoder
from custom.PC_Class import PC
import importlib
import pandas as pd
import numpy as np
from tqdm import tqdm
import warnings
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from custom.DataCreator import InstanceFileWriter, Scenario
import os
datetime_cols = ['CREATIONDATETIME', 'LAAD_DATETIME_VAN', 'LAAD_DATETIME_TOT', 'LOS_DATETIME_VAN', 'LOS_DATETIME_TOT', '15CREATIONDATETIME']

direct = os.getcwd()
file_path = direct + "\\data\\vos_input_data\\MultiHubData3_cleaned.csv" 
total_rows = sum(1 for row in open(file_path, 'r', encoding='utf-8'))
chunk_size = 10000  
tqdm.pandas(desc="Reading CSV")
chunks = pd.read_csv(file_path, chunksize=chunk_size, iterator=True)

def print_df_size(df, text):
    print(text + ": ", len(df))	
    #quickplot(df, text)

df_orders = pd.concat(tqdm(chunks, total=total_rows//chunk_size))

# Convert the 'LOS_DATETIME_VAN' column to datetime format
for column in datetime_cols:
    print(f"column: {column}")
    df_orders[column] = pd.to_datetime(df_orders[column], errors='coerce')

print("Lenght of input data:", str(len(df_orders)))
warnings.filterwarnings("ignore", category=FutureWarning, module="seaborn")

In [None]:
dates = pd.date_range(start='2023-01-01', end='2023-12-30', freq='B') # business days
# # remove all dates that are a monday from the list
# dates = [date for date in dates if date.weekday() != 0]


# list with 15 minute interval timestamps from 7:00 to 21:00
timestamps = pd.date_range(start=pd.Timestamp('06:00'), end=pd.Timestamp('19:00'), freq='15T')


fig, ax = plt.subplots()
for date in dates:
    date_minus_1 = pd.Timestamp(date) - pd.Timedelta(days=1)
    # if date is a monday, then date_minus_1 should be the friday before
    if date.weekday() == 0:
        date_minus_1 = pd.Timestamp(date) - pd.Timedelta(days=3)

    df_orders_filtered = df_orders[((df_orders['LOS_DATETIME_VAN'].dt.date == pd.Timestamp(date).date()) & (df_orders['AFHCODE'] == "d"))] # orders delivered on date
    results = []
    total_len = len(df_orders_filtered)
    if total_len != 0:
        for timestamp in timestamps:
            df_orders_filtered_pre, df_orders_filtered_post = SplitDataByTime(df_orders_filtered, date, date_minus_1, timestamp)
            results.append(len(df_orders_filtered_pre)/ total_len *100)

        # Plot results on the y-axis and timestamps on the x-axis
        # Plot results on the y-axis and timestamps on the x-axis
        print(date,results)
        ax.plot(timestamps, results, alpha=0.07, color='blue')

# Format the x-axis to show only time
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
ax.xaxis.set_major_locator(mdates.HourLocator(interval=1))  # Optional: Adjust as necessary for better tick distribution

# Define the x-axis limit to ensure consistent display
ax.set_xlim([timestamps[0], timestamps[-1]])

# Add a red vertical line at 6 PM
six_pm = pd.Timestamp('17:00')
ax.axvline(x=six_pm, color='red', linestyle='--', label='5 PM')

# Improve plot appearance
plt.xticks(rotation=45)
plt.xlabel('Time of Day')
plt.ylabel('Percentage of Orders received')
plt.title('Percentage of Delivery Orders Released to Planning by Time of Day (2023)')
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
dates = pd.date_range(start='2023-01-01', end='2023-12-30', freq='B') # business days in may 2023
# # remove all dates that are a monday from the list
# dates = [date for date in dates if date.weekday() != 0]


# list with 15 minute interval timestamps from 7:00 to 21:00
timestamps = pd.date_range(start=pd.Timestamp('06:00'), end=pd.Timestamp('19:00'), freq='15T')
all_days = []

fig, ax = plt.subplots()
for date in dates:
    date_minus_1 = pd.Timestamp(date) - pd.Timedelta(days=1)
    # if date is a monday, then date_minus_1 should be the friday before
    if date.weekday() == 0:
        date_minus_1 = pd.Timestamp(date) - pd.Timedelta(days=3)

    df_orders_filtered = df_orders[((df_orders['LOS_DATETIME_VAN'].dt.date == pd.Timestamp(date).date()) & (df_orders['AFHCODE'] == "d"))] # orders delivered on date
    results = []
    total_len = len(df_orders_filtered)
    total_vol = df_orders_filtered['PALLETPLAATSEN'].sum()
    if total_len != 0:
        for timestamp in timestamps:
            df_orders_filtered_pre, df_orders_filtered_post = SplitDataByTime(df_orders_filtered, date, date_minus_1, timestamp)
            vol = df_orders_filtered_pre['PALLETPLAATSEN'].sum()
            results.append(float(vol/ total_vol *100))

        # Plot results on the y-axis and timestamps on the x-axis
        # Plot results on the y-axis and timestamps on the x-axis
        print(date,results)
        ax.plot(timestamps, results, alpha=0.07, color='blue')
        if results[0] != 100:
            all_days.append(results)
   

# Format the x-axis to show only time
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
ax.xaxis.set_major_locator(mdates.HourLocator(interval=1))  # Optional: Adjust as necessary for better tick distribution

# Define the x-axis limit to ensure consistent display
ax.set_xlim([timestamps[0], timestamps[-1]])

# Add a red vertical line at 6 PM
six_pm = pd.Timestamp('17:00')
ax.axvline(x=six_pm, color='red', linestyle='--', label='5 PM')

# Improve plot appearance
plt.xticks(rotation=45)
plt.xlabel('Time of Day')
plt.ylabel('Percentage of Volume of Orders Received')
plt.title('Percentage of Volume of Delivery Orders Released to Planning by Time of Day (2023)')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
def plot_line_with_quartiles(data):
    # Convert data to numpy array for easier manipulation
    data_array = np.array(data)
    fig, ax = plt.subplots()
    # Calculate mean, quartiles, and 5-95 percentiles
    mean_values = np.mean(data_array, axis=0)
    lower_quartile = np.percentile(data_array, 25, axis=0)
    upper_quartile = np.percentile(data_array, 75, axis=0)
    lower_percentile = np.percentile(data_array, 5, axis=0)
    upper_percentile = np.percentile(data_array, 95, axis=0)
    
    # Plot mean line
    plt.plot(timestamps, mean_values, label='Mean', color='blue')
    
    # Plot shaded area for quartiles
    plt.fill_between(timestamps, lower_quartile, upper_quartile, color='blue', alpha=0.2, label='25th-75th Percentile')
    
    # Plot shaded area for 5-95 percentiles
    plt.fill_between(timestamps, lower_percentile, upper_percentile, color='blue', alpha=0.1, label='5th-95th Percentile')
    
    # Format the x-axis to show only time
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
    ax.xaxis.set_major_locator(mdates.HourLocator(interval=1))  # Optional: Adjust as necessary for better tick distribution

    # Define the x-axis limit to ensure consistent display
    ax.set_xlim([timestamps[0], timestamps[-1]])

    # Add a red vertical line at 6 PM
    six_pm = pd.Timestamp('17:00')
    ax.axvline(x=six_pm, color='red', linestyle='--', label='5 PM')

    # Improve plot appearance
    plt.xticks(rotation=45)
    plt.xlabel('Time of Day')
    plt.ylabel('Percentage of Volume of Orders Received')
    plt.title('Percentage of Volume of Delivery Orders Released to Planning by Time of Day (2023)')
    plt.legend()
    plt.tight_layout()
    
    # Show plot
    plt.show()

plot_line_with_quartiles(all_days)

In [None]:
dates = pd.date_range(start='2023-01-01', end='2023-12-30', freq='B') # business days
# # remove all dates that are a monday from the list
# dates = [date for date in dates if date.weekday() != 0]


# list with 15 minute interval timestamps from 7:00 to 21:00
timestamps = pd.date_range(start=pd.Timestamp('06:00'), end=pd.Timestamp('19:00'), freq='15T')


fig, ax = plt.subplots()
for date in dates:
    date_minus_1 = pd.Timestamp(date) - pd.Timedelta(days=1)
    # if date is a monday, then date_minus_1 should be the friday before
    if date.weekday() == 0:
        date_minus_1 = pd.Timestamp(date) - pd.Timedelta(days=3)

    df_orders_filtered = df_orders[((df_orders['LOS_DATETIME_VAN'].dt.date == pd.Timestamp(date).date()) & (df_orders['AFHCODE'] == "d"))] # orders delivered on date
    results = []
    total_len = len(df_orders_filtered)
    if total_len != 0:
        for timestamp in timestamps:
            df_orders_filtered_pre, df_orders_filtered_post = SplitDataByTime(df_orders_filtered, date, date_minus_1, timestamp)
            results.append(len(df_orders_filtered_pre)/ total_len *100)

        # Plot results on the y-axis and timestamps on the x-axis
        # Plot results on the y-axis and timestamps on the x-axis
        print(date,results)
        ax.plot(timestamps, results, alpha=0.07, color='blue')

# Format the x-axis to show only time
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
ax.xaxis.set_major_locator(mdates.HourLocator(interval=1))  # Optional: Adjust as necessary for better tick distribution

# Define the x-axis limit to ensure consistent display
ax.set_xlim([timestamps[0], timestamps[-1]])

# Add a red vertical line at 6 PM
six_pm = pd.Timestamp('17:00')
ax.axvline(x=six_pm, color='red', linestyle='--', label='5 PM')

# Improve plot appearance
plt.xticks(rotation=45)
plt.xlabel('Time of Day')
plt.ylabel('Percentage of Orders received')
plt.title('Percentage of Delivery Orders Released to Planning by Time of Day (2023)')
plt.legend()
plt.tight_layout()
plt.show()
