In [1]:
from load_preprocess.functions import Data
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
# import config
import json
import numpy as np

from shapely.geometry import LineString, mapping
from itertools import combinations
import re
from scipy import spatial


Functions

In [2]:
def clean_freight_df(df, on_load=True):
    
    df = df[['TIME', 'TIME.1', '2021']]
    
    if on_load:
        freight_type = "number_onload"
    else:
        freight_type = "number_offload"
        
    df.rename(columns={"TIME": "geo_code", "TIME.1": "geo_labels", "2021": freight_type}, inplace=True)
    df = df.iloc[1:]
    df_clean = df[df.geo_code.str.startswith("FR").fillna(False)]
    df_clean.reset_index(inplace=True, drop=True)
    
    df_clean[freight_type] = df_clean[freight_type].astype('str')
    df_clean[freight_type] = df_clean[freight_type].str.replace('\.0*$', '', regex=True)
    df_clean.loc[~(df_clean[freight_type].str.isdigit()), freight_type] = '0'
    df_clean[freight_type] = df_clean[freight_type].astype('int')
    
    return df_clean

def department_region_map(path, df_fr):
    df_dpts_region = pd.read_csv(path)
    df_dpts_region.rename(columns={"dep_name":"geo_labels", "region_name":"new_region_name", "old_region_name": "region"}, inplace=True)
    df_final = pd.merge(df_fr, df_dpts_region[["geo_labels", "region"]], how='left', on=['geo_labels'])
    df_final = df_final[~(df_final.geo_labels.str.endswith(" "))]
    return df_final

def merge_freight(path, df_on, df_off):
    
    df_onload_fr = clean_freight_df(df_on, on_load=True)
    df_offload_fr = clean_freight_df(df_off, on_load=False)

    df_fr = pd.merge(df_onload_fr, df_offload_fr, how='inner', on=['geo_code', 'geo_labels'])
    df_fr['total_load'] = df_fr.number_offload + df_fr.number_onload 
    
    df_fr['geo_labels'] = [c[0] for c in df_fr['geo_labels'].str.split("(")]
    df_fr = department_region_map(path, df_fr)
    df_fr = df_fr.groupby("region")["total_load"].sum().reset_index()
    df_fr["full_load"] = df_fr["total_load"].sum()   
    df_fr["perc_load"] = df_fr["total_load"]/df_fr["full_load"]

    return df_fr

def calculate_number_stations(df):
    
    df["max_length_drive"] = max_hours_drive*avg_speed_kmh
    df["length_max"] = df["length_max"]/1e3
    df["length_mean"] = df["length_mean"]/1e3
    df["avg_distance_high_aut"] = df[["max_length_drive", "length_max"]].min(axis=1)
    df["avg_distance_midlow_aut"] = 0.6*df["avg_distance_high_aut"]#df_new[["max_length_drive", "length_mean"]].min(axis=1)
    df["h2_num_2030"] = H2_trucks_2030*df["perc_load"]
    df["R_2030_high_aut"] = autonomy_high_ms*df["h2_num_2030"]*df["avg_distance_high_aut"]/autonomy_high_km
    df["R_2030_mid_aut"] = autonomy_medium_ms*df["h2_num_2030"]*df["avg_distance_midlow_aut"]/autonomy_medium_km
    df["R_2030_low_aut"] = autonomy_low_ms*df["h2_num_2030"]*df["avg_distance_midlow_aut"]/autonomy_low_km
    df["R_2030_total"] = df["R_2030_high_aut"] + df["R_2030_mid_aut"] + df["R_2030_low_aut"]
    df["C_2030"] = open_time/avg_time_fill
    df["num_stations"] = df["R_2030_total"] / df["C_2030"]

    return df

Execution of Frequencies

In [4]:
path_conf = "params/config.json"
conf = json.load(open(path_conf, "r"))
locals().update(conf)

In [7]:
data = Data(path = 'data/')
df = data.create_df()

df.head()

100%|██████████| 8/8 [00:48<00:00,  6.04s/it]


Unnamed: 0,region,road_density,length_m,area_m,length_max,length_mean,diameter,longest_line
3,Île-de-France,0.000227,2744410.0,12063530000.0,85932.399845,3076.692877,123934.499714,162432.99196
15,Nord-Pas-de-Calais,0.000151,1887368.0,12494830000.0,147881.271737,4015.67644,126130.541927,208410.677869
10,Haute-Normandie,0.000113,1398489.0,12371970000.0,135138.054662,4994.602982,125508.904876,163440.964713
0,Alsace,0.000106,883642.1,8322648000.0,171172.687144,4374.466068,102940.396161,188809.381774
17,Picardie,9.3e-05,1822331.0,19547200000.0,163238.986421,7854.875311,157760.172316,218702.689596


In [8]:
df_on = pd.read_excel(path_on_freight, sheet_name='Sheet 1', skiprows=8)
df_off = pd.read_excel(path_off_freight, sheet_name='Sheet 1', skiprows=8)

df_fr = merge_freight(path_region_dpt_map, df_on, df_off)
df_new = pd.merge(df, df_fr[["region", "perc_load"]], how="left", on="region")

df_new = calculate_number_stations(df_new)

display(df_fr.head())

df_new.head()

Unnamed: 0,region,total_load,full_load,perc_load
0,Alsace,96286,3213074,0.029967
1,Aquitaine,188556,3213074,0.058684
2,Auvergne,76627,3213074,0.023849
3,Basse-Normandie,78563,3213074,0.024451
4,Bourgogne,95622,3213074,0.02976


Unnamed: 0,region,road_density,length_m,area_m,length_max,length_mean,diameter,longest_line,perc_load,max_length_drive,avg_distance_high_aut,avg_distance_midlow_aut,h2_num_2030,R_2030_high_aut,R_2030_mid_aut,R_2030_low_aut,R_2030_total,C_2030,num_stations
0,Île-de-France,0.000227,2744410.0,12063530000.0,85.9324,3.076693,123934.499714,162432.99196,0.097211,720,85.9324,51.55944,972.112687,38.426549,32.579031,93.560293,164.565873,96.0,1.714228
1,Nord-Pas-de-Calais,0.000151,1887368.0,12494830000.0,147.881272,4.015676,126130.541927,208410.677869,0.066784,720,147.881272,88.728763,667.843318,45.430299,38.516992,110.612901,194.560193,96.0,2.026669
2,Haute-Normandie,0.000113,1398489.0,12371970000.0,135.138055,4.994603,125508.904876,163440.964713,0.039934,720,135.138055,81.082833,399.337208,24.824201,21.046605,60.441532,106.312337,96.0,1.10742
3,Alsace,0.000106,883642.1,8322648000.0,171.172687,4.374466,102940.396161,188809.381774,0.029967,720,171.172687,102.703612,299.669413,23.595801,20.005135,57.450645,101.051581,96.0,1.052621
4,Picardie,9.3e-05,1822331.0,19547200000.0,163.238986,7.854875,157760.172316,218702.689596,0.040049,720,163.238986,97.943392,400.491865,30.072908,25.496596,73.220993,128.790496,96.0,1.341568
