In [10]:
from load_preprocess.functions import Data
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
# import config
import json

Functions

In [None]:
def clean_freight_df(df, on_load=True):
    
    df = df[['TIME', 'TIME.1', '2021']]
    
    if on_load:
        freight_type = "number_onload"
    else:
        freight_type = "number_offload"
        
    df.rename(columns={"TIME": "geo_code", "TIME.1": "geo_labels", "2021": freight_type}, inplace=True)
    df = df.iloc[1:]
    df_clean = df[df.geo_code.str.startswith("FR").fillna(False)]
    df_clean.reset_index(inplace=True, drop=True)
    
    df_clean[freight_type] = df_clean[freight_type].astype('str')
    df_clean[freight_type] = df_clean[freight_type].str.replace('\.0*$', '', regex=True)
    df_clean.loc[~(df_clean[freight_type].str.isdigit()), freight_type] = '0'
    df_clean[freight_type] = df_clean[freight_type].astype('int')
    
    return df_clean

def department_region_map(path, df_fr):
    df_dpts_region = pd.read_csv(path)
    df_dpts_region.rename(columns={"dep_name":"geo_labels", "region_name":"new_region_name", "old_region_name": "region"}, inplace=True)
    df_final = pd.merge(df_fr, df_dpts_region[["geo_labels", "region"]], how='left', on=['geo_labels'])
    df_final = df_final[~(df_final.geo_labels.str.endswith(" "))]
    return df_final

def merge_freight(path, df_on, df_off):
    
    df_onload_fr = clean_freight_df(df_on, on_load=True)
    df_offload_fr = clean_freight_df(df_off, on_load=False)

    df_fr = pd.merge(df_onload_fr, df_offload_fr, how='inner', on=['geo_code', 'geo_labels'])
    df_fr['total_load'] = df_fr.number_offload + df_fr.number_onload 
    
    df_fr['geo_labels'] = [c[0] for c in df_fr['geo_labels'].str.split("(")]
    df_fr = department_region_map(path, df_fr)
    df_fr = df_fr.groupby("region")["total_load"].sum().reset_index()
    df_fr["full_load"] = df_fr["total_load"].sum()   
    df_fr["perc_load"] = df_fr["total_load"]/df_fr["full_load"]

    return df_fr

def calculate_number_stations(df):
    
    df["max_length_drive"] = max_hours_drive*avg_speed_kmh
    df["length_max"] = df["length_max"]/1000
    df["length_mean"] = df["length_mean"]/1000
    df["avg_distance_high_aut"] = df[["max_length_drive", "length_max"]].min(axis=1)
    df["avg_distance_midlow_aut"] = 0.6*df["avg_distance_high_aut"]#df_new[["max_length_drive", "length_mean"]].min(axis=1)
    df["h2_num_2030"] = H2_trucks_2030*df["perc_load"]
    df["R_2030_high_aut"] = autonomy_high_ms*df["h2_num_2030"]*df["avg_distance_high_aut"]/autonomy_high_km
    df["R_2030_mid_aut"] = autonomy_medium_ms*df["h2_num_2030"]*df["avg_distance_midlow_aut"]/autonomy_medium_km
    df["R_2030_low_aut"] = autonomy_low_ms*df["h2_num_2030"]*df["avg_distance_midlow_aut"]/autonomy_low_km
    df["R_2030_total"] = df["R_2030_high_aut"] + df["R_2030_mid_aut"] + df["R_2030_low_aut"]
    df["C_2030"] = open_time/avg_time_fill
    df["num_stations"] = df["R_2030_total"] / df["C_2030"]

    return df

In [None]:
# In Config file now:

# path_region_dpt_map = 'data/departements-region.csv'
# H2_trucks_2030 = 10000
# H2_trucks_2040 = 60000
# # https://www.eea.europa.eu/data-and-maps/daviz/hdv-market-share-for-vehicle#tab-chart_1
# autonomy_high_ms = 0.46
# autonomy_medium_ms = 0.26
# autonomy_low_ms = 0.28
# autonomy_high_km = 1000
# autonomy_medium_km = 400
# autonomy_low_km = 150
# max_hours_drive = 9
# avg_speed_kmh = 80
# avg_time_fill = 15/60
# open_time = 24

Execution of Frequencies

In [None]:
path_conf = "params/config.json"
conf = json.load(open(path_conf, "r"))
locals().update(conf)

In [2]:
data = Data(path = 'data/')
df = data.create_df()

df.head()

100%|██████████| 8/8 [00:47<00:00,  5.97s/it]


In [None]:
df_on = pd.read_excel(path_on_freight, sheet_name='Sheet 1', skiprows=8)
df_off = pd.read_excel(path_off_freight, sheet_name='Sheet 1', skiprows=8)

df_fr = merge_freight(path_region_dpt_map, df_on, df_off)
df_new = pd.merge(df, df_fr[["region", "perc_load"]], how="left", on="region")

df_new = calculate_number_stations(df_new)

display(df_fr.head())

df_new.head()