In [3]:
import sys
import os
import pandas as pd
current_file_path = os.path.abspath(os.getcwd())
parent_dir = os.path.abspath(os.path.join(current_file_path, '..','..'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

from argparse import Namespace
from constants.paths import FOLDER_PATH
from examples.benchmark import local_get_args
from load_inputs.Lyon.bike.bike_in import load_data, START, END
from plotting.TS_analysis import plot_TS



coverage_period = pd.date_range(start=START, end=END, freq='15min')[:-1]  # Exclude the last date to match the data
invalid_dates = []
minmaxnorm = True
standardize = False  # Set to True if you want to standardize the data
config = {'model_name': 'STGCN',
         'freq' : '15min',
        'dataset_names':['bike_in'],
        'dataset_for_coverage' : ['bike_in'],
        'target_data': 'bike_in',
        'contextual_kwargs':{'bike_in':{'agg_iris_target_n':100,
                                        'threshold_volume_min': 0,
                                },
                                        },
        'target_kwargs':{'bike_in':{'agg_iris_target_n':100,
                                        'threshold_volume_min': 0,
                                },
                                        }, 
        'step_ahead':1,
        'horizon_step':1,
        }

args = local_get_args(config['model_name'],
                args_init = None,
                dataset_names=config['dataset_names'],
                dataset_for_coverage=config['dataset_for_coverage'],
                modification = config)


ds = load_data(FOLDER_PATH, coverage_period, invalid_dates, args, minmaxnorm,standardize, normalize=True,
              tensor_limits_keeper = None)

df =  pd.DataFrame(ds.U_train[:,:,-1].detach().cpu().numpy(),columns = ds.spatial_unit, index = ds.tensor_limits_keeper.df_verif_train.iloc[:,-2] )
plot_TS(df.iloc[:2000,:],bool_show = True, height = 800)

Loading from /home/rrochas/../../data/rrochas/prediction_validation/agg_data/velov/velov_attracted_by_station15min.csv...
df pivoted:  (70094, 435)
df reindexed :  (35040, 435)
Len coverage period:  35040
df filtered:  (35040, 435)
   Loaded data: (35040, 435)
   Dimension after spatial agg: (35040, 51)
Values with issues:  35.294%
Regular Values that we have to set to 0:  0.000%
Values with issues:  35.294%
Regular Values that we have to set to 0:  0.664%
Values with issues:  35.294%
Regular Values that we have to set to 0:  0.987%
Values with issues:  35.294%
Regular Values that we have to set to 0:  0.000%
Values with issues:  35.294%
Regular Values that we have to set to 0:  0.667%
Values with issues:  35.294%
Regular Values that we have to set to 0:  0.985%


In [None]:
from load_inputs.Lyon.bike.bike_out import load_data as load_data_out

args_out = Namespace(**vars(args))  
args_out.target_data = 'bike_out'
args_out.dataset_for_coverage = ['bike_out']
args_out.dataset_names = ['bike_out']
args_out.contextual_kwargs = {'bike_out':{'agg_iris_target_n':50,
                                          'threshold_volume_min': 1,
                                         
                                        },
                            }


ds_out = load_data_out(FOLDER_PATH, coverage_period, invalid_dates, args_out, minmaxnorm,standardize, normalize=True,
              tensor_limits_keeper = None)
df_out =  pd.DataFrame(ds_out.U_train[:,:,-1].detach().cpu().numpy(),columns = ds_out.spatial_unit, index = ds_out.tensor_limits_keeper.df_verif_train.iloc[:,-2] )


# Filtered
args_out.contextual_kwargs = {'bike_out':{'agg_iris_target_n':100,
                                          'threshold_volume_min': 1,
                                           'quantile_filter_outliers': 0.98
                                        },
                            }
ds_out_filtered = load_data_out(FOLDER_PATH, coverage_period, invalid_dates, args_out, minmaxnorm,standardize, normalize=True,
              tensor_limits_keeper = None)
df_out_filtered =  pd.DataFrame(ds_out_filtered.U_train[:,:,-1].detach().cpu().numpy(),columns = [f"filtered_{c}" for c in ds_out_filtered.spatial_unit], index = ds_out_filtered.tensor_limits_keeper.df_verif_train.iloc[:,-2] )
# ---

df_out_concat = pd.concat([df_out, df_out_filtered], axis=1)
plot_TS(df_out_concat.iloc[:2000,:],bool_show = True, height = 800)

Loading from /home/rrochas/../../data/rrochas/prediction_validation/agg_data/velov/velov_emitted_by_station15min.csv...
df pivoted:  (70049, 434)
df reindexed :  (35040, 434)
Len coverage period:  35040
df filtered:  (35040, 434)
   Loaded data: (35040, 434)
   Dimension after spatial agg: (35040, 13)
Loading from /home/rrochas/../../data/rrochas/prediction_validation/agg_data/velov/velov_emitted_by_station15min.csv...
df pivoted:  (70049, 434)
df reindexed :  (35040, 434)
Len coverage period:  35040
df filtered:  (35040, 434)
   Loaded data: (35040, 434)
   Dimension after spatial agg: (35040, 13)


In [6]:
import numpy as np 
df_all = df_out.sum(1)
df_all[df_all.index.hour.isin([0,1,2,3,4,5,6])] = df_all[df_all.index.hour.isin([0,1,2,3,4,5,6])] +1
plot_TS(pd.DataFrame(df_all),bool_show = True, height = 800,title = 'Detection of Missing values')

In [4]:
def load_bike_in_data(file_pattern,target_freq,data_subfolder,date_col,location_col,value_col,START,END,coverage_period,target_n):
    # Construction spécifique du nom de fichier pour velov
    file_name = f"{file_pattern}{target_freq}"
    data_file_path = os.path.join(FOLDER_PATH, data_subfolder, f"{file_name}.csv")
    df = pd.read_csv(data_file_path)
    # --- Preprocessing ---
    df[date_col] = pd.to_datetime(df[date_col])
    df_pivoted = df.pivot_table(index=date_col, columns=location_col, values=value_col, aggfunc='sum')

    # Fill Nan value
    df_pivoted = df_pivoted.fillna(0)

    # Convert into Datetime
    df_pivoted.index = pd.to_datetime(df_pivoted.index)
    df_pivoted = df_pivoted.reindex(pd.date_range(start =START, end = END, freq=target_freq)[:-1]).fillna(0)
    df_filtered = df_pivoted[df_pivoted.index.isin(coverage_period)].copy()


    threshold_volume_min = 1 # args.contextual_kwargs[name]['threshold_volume_min']

    #Load Data: 
    s_zone2stations_path = f"{FOLDER_PATH}/lyon_iris_agg{target_n}/zone2stations.csv"
    s_zone2stations = pd.read_csv(s_zone2stations_path,index_col = 0)

    agg_df = pd.DataFrame(columns = s_zone2stations.index)
    station_per_zones = {}
    for idx,row in s_zone2stations.iterrows():
        station_id = row.STATION
        columns = list(map(int,station_id.split(' ')))
        effective_columns = [c for c in columns if c in df_filtered.columns]
        agg_df[idx] = df_filtered[effective_columns].sum(axis=1)
        station_per_zones[idx] = len(effective_columns)

    for k in range(target_n):
        if not k in station_per_zones.keys():
            station_per_zones[k] = 0

    mask = agg_df.mean() > threshold_volume_min
    df_filtered = agg_df.T[mask].T

    return df_filtered, agg_df, station_per_zones, target_n



from load_inputs.Lyon.bike.bike_in import DATE_COL, LOCATION_COL, VALUE_COL, FILE_PATTERN,NAME,DATA_SUBFOLDER
import geopandas as gpd 
import numpy as np 

data_subfolder = DATA_SUBFOLDER 
file_pattern = FILE_PATTERN
name = NAME
date_col, location_col, value_col = DATE_COL, LOCATION_COL, VALUE_COL
target_freq = args.freq 
target_n = args.contextual_kwargs[name]['agg_iris_target_n']


# Build gdf 

df_filtered, agg_df, station_per_zones, target_n  = load_bike_in_data(file_pattern,target_freq,data_subfolder,date_col,location_col,value_col,START,END,coverage_period,target_n)
gdf_in = gpd.read_file(f"{FOLDER_PATH}/lyon_iris_agg{target_n}/lyon.shp")
gdf_in['nb_stations'] = gdf_in.index.map(station_per_zones)
gdf_in['Average Number of Bike-Sharing Drop-off per time-step and per zone'] = gdf_in.index.map(dict(agg_df.mean()[agg_df.mean() > 1]))


In [9]:
gdf_in = gpd.read_file(f"{FOLDER_PATH}/lyon_iris_shapefile/lyon.shp")
gdf_in['nb_stations'] = gdf_in.index.map(station_per_zones)
gdf_in['Average Number of Bike-Sharing Drop-off per time-step and per zone'] = gdf_in.index.map(dict(agg_df.mean()[agg_df.mean() > 1]))
gdf_in.explore()


## Number of Stations per zones: 

In [5]:
# Plot explore where station with less than 1 station are ploted in grey:
gdf_in['Number of Bike-Sharing Stations'] = gdf_in['nb_stations'].apply(lambda x: x if x >= 1 else np.nan)
gdf_in.explore('Number of Bike-Sharing Stations', cmap= 'YlOrRd',tiles = 
            'CartoDB positron', legend = True,  style_kwds=dict(color='black', weight=0.5, fillOpacity=0.7), tooltip = ['IRIS','Number of Bike-Sharing Stations','Average Number of Bike-Sharing Drop-off per time-step and per zone'])

## Bike-In volume (Drop-off, attracted volume)

In [89]:
# Plot explore where station with less than 1 drop-off per time-step in average have been removed :
gdf_in.explore('Average Number of Bike-Sharing Drop-off per time-step and per zone', cmap= 'YlOrRd',tiles = 
            'CartoDB positron', legend = True,style_kwds=dict(color='black', weight=0.5, fillOpacity=0.7), tooltip = ['IRIS','Number of Bike-Sharing Stations','Average Number of Bike-Sharing Drop-off per time-step and per zone'])

## Bike-Out volume (Pick-up, emitted volume)

In [85]:
from load_inputs.Lyon.bike.bike_out import DATE_COL, LOCATION_COL, VALUE_COL, FILE_PATTERN,NAME,DATA_SUBFOLDER

data_subfolder = DATA_SUBFOLDER 
file_pattern = FILE_PATTERN
name = NAME
date_col, location_col, value_col = DATE_COL, LOCATION_COL, VALUE_COL
target_freq = args.freq 
target_n = 50


# Build gdf 

df_filtered, agg_df, station_per_zones, target_n  = load_bike_in_data(file_pattern,target_freq,data_subfolder,date_col,location_col,value_col,START,END,coverage_period,target_n)
gdf_out = gpd.read_file(f"{FOLDER_PATH}/lyon_iris_agg{target_n}/lyon.shp")
gdf_out['nb_stations'] = gdf_out.index.map(station_per_zones)

gdf_out['Number of Bike-Sharing Stations'] = gdf_out['nb_stations'].apply(lambda x: x if x >= 1 else np.nan)
gdf_out['Average Number of Bike-Sharing Pick-Up per time-step and per zone'] = gdf_out.index.map(dict(agg_df.mean()[agg_df.mean() > 1]))

In [90]:

gdf_out.explore('Average Number of Bike-Sharing Pick-Up per time-step and per zone', cmap= 'YlOrRd',tiles = 
            'CartoDB positron', legend = True,  style_kwds=dict(color='black', weight=0.5, fillOpacity=0.7), tooltip = ['IRIS','Number of Bike-Sharing Stations','Average Number of Bike-Sharing Pick-Up per time-step and per zone'])