In [1]:
import glob

import numpy as np
import pandas as pd

from scipy.spatial.distance import cdist

import xarray as xr

import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

In [2]:
file_parquet = "/Users/9204057K/Library/CloudStorage/OneDrive-SNCF/04_Projets/GitHub/hackathon_meteo_france/data/projections_parquet/tasmaxAdjust_FR-Metro_CNRM-ESM2-1_ssp370_r1i1p1f2_CNRM-MF_CNRM-AROME46t1_v1-r1_MF-CDFt-ANASTASIA-ALPX-3-1991-2020_day_20150101-20191231_2015.parquet"
file_gares = "/Users/9204057K/Library/CloudStorage/OneDrive-SNCF/04_Projets/GitHub/hackathon_meteo_france/data/gares-de-voyageurs.csv"


cols_to_keep = [
    "Nom", "Position géographique"
]


# Clean gares
df_gares = pd.read_csv(file_gares, sep=";")
df_gares = df_gares[cols_to_keep].dropna()
df_gares['Position géographique'] = df_gares['Position géographique'].str.split(', ')
df_gares['lat'] = df_gares['Position géographique'].apply(lambda x: x[0]).astype(float)
df_gares['lon'] = df_gares['Position géographique'].apply(lambda x: x[1]).astype(float)
df_gares.drop('Position géographique', axis=1, inplace=True)
df_gares.rename(columns={'Nom': "gare"}, inplace=True)

df_gares

Unnamed: 0,gare,lat,lon
0,Abancourt,49.685224,1.774306
1,Abbaretz,47.554643,-1.524416
2,Abbeville,50.102210,1.824490
3,Ablon-sur-Seine,48.725468,2.419151
4,Achères Grand Cormier,48.955183,2.091903
...,...,...,...
2780,Yffiniac,48.470246,-2.652463
2781,Ygos-Saint-Saturnin,43.978185,-0.736153
2782,Ytrac,44.910689,2.364447
2783,Yvetot,49.622035,0.750115


In [3]:
# Clean projection : 1 GPS point unique
df_proj = pd.read_parquet(file_parquet)
df_proj = df_proj.reset_index()
df_proj = df_proj[df_proj['time'] == "2015-06-01 12:00:00"]
df_proj.drop(['x', 'y', 'time', 'tasmaxAdjust'], axis=1, inplace=True)
df_proj

Unnamed: 0,lon,lat
0,9.1627,41.394
1,9.1925,41.393
2,9.2224,41.392
3,9.1345,41.418
4,9.1644,41.416
...,...,...
87037,2.4836,51.051
87038,2.5192,51.052
87039,2.5548,51.053
87040,2.5182,51.074


In [4]:
def find_nearest_station(
    df_proj: pd.DataFrame, df_station: pd.DataFrame
) -> pd.DataFrame:
    # Calcule les distances à une gare
    distances = cdist(
        df_proj[["lat", "lon"]],
        df_station[["lat", "lon"]],
        metric="euclidean",  # lambda a, b: geodesic(a, b).kilometers,
    )
    # Détermine la station la plus proche
    df_proj[["gare"]] = df_station[["gare"]].to_numpy()[distances.argmin(axis=1)]
    return df_proj

In [5]:
df_map = df_proj.copy()
df_map = find_nearest_station(df_proj=df_map, df_station=df_gares)
df_map

Unnamed: 0,lon,lat,gare
0,9.1627,41.394,Menton Garavan
1,9.1925,41.393,Menton Garavan
2,9.2224,41.392,Menton Garavan
3,9.1345,41.418,Menton Garavan
4,9.1644,41.416,Menton Garavan
...,...,...,...
87037,2.4836,51.051,Bergues
87038,2.5192,51.052,Bergues
87039,2.5548,51.053,Bergues
87040,2.5182,51.074,Bergues


In [6]:
def find_nearest_station(
    df_gares: pd.DataFrame, df_proj: pd.DataFrame
) -> pd.DataFrame:
    # Calcule les distances à une gare
    distances = cdist(
        df_gares[["lat", "lon"]],
        df_proj[["lat", "lon"]],
        metric="euclidean",  # lambda a, b: geodesic(a, b).kilometers,
    )
    # Détermine la station la plus proche
    df_gares[["lat_proj", "lon_proj"]] = df_proj[["lat", "lon"]].to_numpy()[distances.argmin(axis=1)]
    return df_gares

In [7]:
df_map = df_proj.copy()
df_map = find_nearest_station(df_gares=df_gares, df_proj=df_proj)
df_map.to_csv('../data/gares_map_projection.csv', index=False)
df_map

Unnamed: 0,gare,lat,lon,lat_proj,lon_proj
0,Abancourt,49.685224,1.774306,49.690,1.7815
1,Abbaretz,47.554643,-1.524416,47.558,-1.5347
2,Abbeville,50.102210,1.824490,50.096,1.8275
3,Ablon-sur-Seine,48.725468,2.419151,48.716,2.4167
4,Achères Grand Cormier,48.955183,2.091903,48.956,2.0978
...,...,...,...,...,...
2780,Yffiniac,48.470246,-2.652463,48.462,-2.6640
2781,Ygos-Saint-Saturnin,43.978185,-0.736153,43.971,-0.7296
2782,Ytrac,44.910689,2.364447,44.914,2.3557
2783,Yvetot,49.622035,0.750115,49.616,0.7458


In [8]:
fig = px.scatter_map(data_frame=df_map, lat='lat_proj', lon='lon_proj', text="gare",
                     zoom=3.9, height=600, width=900
)
fig.show()

# Import des fichiers parquet selon la date



In [9]:
parquet_files = glob.glob("")

# Lit les projection mappées sur les gares
df_gares = pd.read_csv('../data/gares_map_projection.csv')
df_gares.drop(['lat', 'lon'], axis=1, inplace=True)

# Lit les fichiers parquet
df_proj = pd.read_parquet(file_parquet)
df_proj = df_proj.reset_index()
df_proj.drop(['x', 'y'], axis=1, inplace=True)

# Merge avec les gares
df = pd.merge(
    left=df_proj, right=df_gares, how='left', left_on=['lat', 'lon'], right_on=['lat_proj', 'lon_proj']
).dropna().drop(['lat_proj', 'lon_proj'], axis=1)

df

Unnamed: 0,time,tasmaxAdjust,lon,lat,gare
963,2015-06-01 12:00:00,297.580475,3.1584,42.436,Cerbère
994,2015-06-01 12:00:00,302.567871,1.9421,42.433,Bourg-Madame
1057,2015-06-01 12:00:00,300.781189,1.9106,42.454,Latour-de-Carol - Enveitg
1059,2015-06-01 12:00:00,298.334473,2.0322,42.457,Saillagouse
1094,2015-06-01 12:00:00,297.634705,3.1267,42.480,Banyuls-sur-Mer
...,...,...,...,...,...
8027214,2015-08-31 12:00:00,298.317352,2.4166,50.960,Bergues
8027226,2015-08-31 12:00:00,295.602600,2.1311,50.977,Gravelines
8027248,2015-08-31 12:00:00,296.561249,2.3078,51.003,Grande-Synthe
8027261,2015-08-31 12:00:00,295.370331,2.3778,51.027,Coudekerque-Branche


In [None]:
fig = px.scatter_map(data_frame=df, lat='lat', lon='lon', text="gare", animation_frame="tasmaxAdjust",
                     color="tasmaxAdjust", range_color=[10, 36],
                     color_continuous_scale=["white", "yellow", "orange", "purple"],
                     
                     zoom=3.9, height=600, width=900
)
fig.show()