# Open aggregated PT files from 'individual validation' (smart card data)
- **Transport Mode:** 'subway', 'bus', 'tram'
- **Temporal Aggregation available :** '2min', '3min', '5min', '6min', '10min', '15min', '30min', '1H'
- **Coverage Period:** `01-11-2019` to `30-03-2020` 

In [26]:
import pandas as pd
import geopandas as gpd
FOLDER_PATH = '../../../../data/rrochas/prediction_validation/'

## Open Bus, Tramway, Subway data : 

In [40]:
agg= '15min'  # choose between 2min, 3min, 5min, 10min, 15min, 30min, 1H

for mode in ['subway','tramway','bus','vaf']:
    csv_path = f"{FOLDER_PATH}/agg_data/validation_individuelle/{mode}_indiv_{agg}/{mode}_indiv_{agg}.csv"
    globals()[f"df_{mode}"] = pd.read_csv(csv_path,index_col = 0)

    if 'VAL_DATE' in globals()[f"df_{mode}"].columns:
        globals()[f"df_{mode}"]['VAL_DATE'] = pd.to_datetime(globals()[f"df_{mode}"]['VAL_DATE']) 
    else: 
        globals()[f"df_{mode}"].index = pd.to_datetime(globals()[f"df_{mode}"].index)

display(df_bus.head(2))
display(df_tramway.head(2))
display(df_subway.head(2))
display(df_vaf.head(2))

  globals()[f"df_{mode}"] = pd.read_csv(csv_path,index_col = 0)


Unnamed: 0,LIG_NUMERO_SAE,VAL_ARRET_CODE,CRS_SENS_TRAJET,COD_LIG_CLI,VAL_DATE,Flow
0,2,202,1,2,2019-11-01 12:15:00,1
1,2,202,1,2,2019-11-02 10:30:00,1


Unnamed: 0,LIG_NUMERO_SAE,VAL_ARRET_CODE,CRS_SENS_TRAJET,COD_LIG_CLI,VAL_DATE,Flow
0,520,32102,0,T1,2019-11-02 15:00:00,66
1,520,32102,0,T1,2019-11-02 17:30:00,53


Unnamed: 0_level_0,AMP,BEL,BRO,CHA,COR,CPA,CRO,CUI,CUS,DEB,...,PER,GUI,JAU,REP,SAN,SAX,GER,VMY,SOI,JEA
VAL_DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-11-01 00:00:00,20.0,164.0,10.0,59.0,88.0,5.0,20.0,1.0,5.0,21.0,...,89.0,13.0,16.0,12.0,49.0,68.0,8.0,23.0,3.0,59.0
2019-11-01 00:15:00,6.0,82.0,11.0,30.0,43.0,3.0,8.0,0.0,2.0,3.0,...,19.0,7.0,7.0,3.0,11.0,33.0,2.0,16.0,0.0,38.0


Unnamed: 0_level_0,VAL_ARRET_CODE,VAL_DATE,Flow
LIG_NUMERO_SAE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,2020-02-01 07:00:00,1
0,0,2020-02-01 07:30:00,2


## Spatial Matching 

In [34]:
ref_subway = pd.read_csv(f"{FOLDER_PATH}/ref_subway.csv",index_col = 0).rename(columns = {'MEAN_X' : 'lon','MEAN_Y':'lat'})
ref_tram_bus = pd.read_csv(f"{FOLDER_PATH}/ref_tram_bus.csv",index_col = 0).rename(columns = {'IDT_PNT' : 'VAL_ARRET_CODE','COO_X_WGS84':'lon','COO_Y_WGS84':'lat'})

ref_tram_bus = ref_tram_bus[['lon','lat','NOM_PNT','VAL_ARRET_CODE']]
ref_subway = ref_subway[['lon','lat','COD_TRG','LIB_STA_SIFO']]

df_bus = df_bus.merge(ref_tram_bus, how = 'inner', on = 'VAL_ARRET_CODE')  
df_tramway = df_tramway.merge(ref_tram_bus, how = 'inner', on = 'VAL_ARRET_CODE')  

display(df_bus.head(2))
display(df_tramway.head(2))

Unnamed: 0,LIG_NUMERO_SAE,VAL_ARRET_CODE,CRS_SENS_TRAJET,COD_LIG_CLI,VAL_DATE,Flow,lon,lat,NOM_PNT
0,2,202,1,2,2019-11-01 12:15:00,1,4.808662,45.791424,Bifurcation du Rosay
1,2,202,1,2,2019-11-02 10:30:00,1,4.808662,45.791424,Bifurcation du Rosay


Unnamed: 0,LIG_NUMERO_SAE,VAL_ARRET_CODE,CRS_SENS_TRAJET,COD_LIG_CLI,VAL_DATE,Flow,lon,lat,NOM_PNT
0,520,32102,0,T1,2019-11-02 15:00:00,66,4.827012,45.749599,Perrache
1,520,32102,0,T1,2019-11-02 17:30:00,53,4.827012,45.749599,Perrache


## Visualisation Bus 

In [35]:
df_agg = df_bus.groupby(['VAL_ARRET_CODE','LIG_NUMERO_SAE','CRS_SENS_TRAJET']).agg({'lon':'first','lat':'first','NOM_PNT':'first','COD_LIG_CLI':'first','Flow':'sum'})
df_agg = gpd.GeoDataFrame(df_agg, geometry=gpd.points_from_xy(df_agg.lon, df_agg.lat))
df_agg.crs = "EPSG:4326"
df_agg.explore('Flow',tiles = 'CartoDB positron',vmin = 0, vmax = df_agg.Flow.quantile(0.75))

 ## Visualisation Tram et premier tri:

In [37]:

# A retirer (station qui concerne < 200 flow sur 4 mois et qui est localisé en norvène ou autriche). Meyzieu ~2200 flow
NOM_PNT_autriche = ['CDM Saint Priest']
NOM_PNT_norvege = ['Passage machine à laver UTTL', 'Via Part-Dieu Villette V1', 'CDM Meyzieu', 
                   'Retournement Charpennes', 'Retournement SPR Bel Air', 'Retournement Liberté',
                     'Garage Feyssine (voie 16)', 'Garage Feyssine (voie 24)',
                    'Passage machine à laver UTT', 'Retournement Perrache V1',
                    'Retournement Feyssine V2', 'Retournement Quai C. Bernard',
                    'Retournement Perrache V4'
                   ]

df_tramway = df_tramway[~df_tramway.NOM_PNT.isin(NOM_PNT_autriche + NOM_PNT_norvege)]
df_agg = df_tramway.groupby(['VAL_ARRET_CODE','LIG_NUMERO_SAE','CRS_SENS_TRAJET']).agg({'lon':'first','lat':'first','NOM_PNT':'first','COD_LIG_CLI':'first','Flow':'sum'})
df_agg = gpd.GeoDataFrame(df_agg, geometry=gpd.points_from_xy(df_agg.lon, df_agg.lat))
df_agg.crs = "EPSG:4326"
df_agg.explore('Flow',tiles = 'CartoDB positron',vmin = 0, vmax = df_agg.Flow.quantile(0.75))
