## Chargement des données de validations

Fonction utilitaire pour "flatten" certaines colonnes.

In [1]:
import csv
import folium
import pandas as pd
import numpy as np

def load_csv(csv_filename, columns, to_flatten, value):
    with open(csv_filename, 'r', newline='\n') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=';')
        
        data = {}
        header = {elt: index for index, elt in enumerate(next(csv_reader))}
        flattens = set()
        
        for row in csv_reader:
            key = tuple(row[header[column]] for column in columns)
            flattens.add(row[header[to_flatten]])
            try:
                data[key][row[header[to_flatten]]] = row[header[value]]
            except KeyError:
                data[key] = {row[header[to_flatten]]: row[header[value]]}
                
        df_dict = {elt:  [] for elt in columns}
        df_dict.update({elt: [] for elt in flattens})
    
        for key, values in data.items():
            for index, column in enumerate(columns):
                df_dict[column].append(key[index])
            for flatten in flattens:
                df_dict[flatten].append(values.get(flatten, 0)) 
        
        return pd.DataFrame(df_dict)

## Chargement des données de validation

In [2]:
p_columns = ['LIBELLE_ARRET', 'CAT_JOUR', 'ID_REFA_LDA']
p_flatten = 'TRNC_HORR_60'
p_value = 'pourc_validations'

v_columns = ['LIBELLE_ARRET', 'JOUR', 'ID_REFA_LDA']
v_flatten = 'CATEGORIE_TITRE'
v_value = 'NB_VALD'

profile_s1 = load_csv('Data/validations-profils-horaires-2017s1.csv', p_columns, p_flatten, p_value)
profile_s2 = load_csv('Data/validations-profils-horaires-2017s2.csv', p_columns, p_flatten, p_value)

validation_s1 = load_csv('Data/validations-nombre-par-jour-2017s1.csv', v_columns, v_flatten, v_value)
validation_s2 = load_csv('Data/validations-nombre-par-jour-2017s2.csv', v_columns, v_flatten, v_value)

## Chargement des données spatiales

In [3]:
emplacements = pd.read_csv('Data/emplacement-des-gares-idf.csv', delimiter=';')
arrets = pd.read_csv('Data/referentiel-arret-tc-idf.csv', delimiter=';')

possible_arrets_types = ['Station de métro', 'Station ferrée / Val']
arrets = arrets[
    (arrets['ZDEr_LIBELLE_TYPE_ARRET'] == 'Station de métro') |
    (arrets['ZDEr_LIBELLE_TYPE_ARRET'] == 'Station ferrée / Val')
]

## Fonction utilitaire

Fonction utilitaire pour récupérer les données spatiales en fonction de la ligne séléctionnée

In [4]:
def get_emplacement(zde, zdl):
    return emplacements[(emplacements['ID_REF_ZDE'] == zde) & (emplacements['ID_REF_ZDL'] == zdl)]

def find_points(row):
    id_lda = row['ID_REFA_LDA']
    possible_arrets = arrets[arrets['LDA_ID_REF_A'] == int(id_lda)]
    ids = [(arret['ZDEr_ID_REF_A'], arret['ZDLr_ID_REF_A']) for _, arret in possible_arrets.iterrows()]
    
    try:
        row['LIGNES'] = [get_emplacement(zde, zdl).LIGNE.iloc[0] for zde, zdl in ids] if ids else np.nan
    except:
        row['LIGNES'] = np.nan
        
    try:
        coord = get_emplacement(ids[0][0], ids[0][1])['Geo Point'].iloc[0] if ids else np.nan
        row['COORDINATES'] = tuple(float(elt) for elt in coord.split(', '))
    except:
        row['COORDINATES'] = np.nan
    return row
    
profile_s1 = profile_s1.drop(profile_s1[profile_s1['ID_REFA_LDA'] == ''].index)
profile_s1 = profile_s1.apply(find_points, axis=1)

## Affichage des stations

Affichage des stations sur la carte à l'aide de la librairie folium

In [7]:
def draw_map(df, coordinates, popup):
    map = folium.Map(location=[48.86, 2.33], zoom_start=13)

    for station in df[popup].unique():
        arret = df[profile_s1[popup] == station].iloc[0]
        if not np.isnan(arret[coordinates]).any():
            folium.Marker(arret[coordinates], popup=station).add_to(map)
            
    return map

draw_map(profile_s1, 'COORDINATES', 'LIBELLE_ARRET')

In [10]:
validation_s1.columns

Index(['AMETHYSTE', 'AUTRE TITRE', 'FGT', 'ID_REFA_LDA', 'IMAGINE R', 'JOUR',
       'LIBELLE_ARRET', 'NAVIGO', 'NON DEFINI', 'TST'],
      dtype='object')