In [1]:
import glob

import numpy as np
import pandas as pd

from scipy.spatial.distance import cdist

import xarray as xr

import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

In [2]:
parquet_files = glob.glob("")
file_parquet = "/Users/9204057K/Library/CloudStorage/OneDrive-SNCF/04_Projets/GitHub/hackathon_meteo_france/data/projections_parquet/tasmaxAdjust_FR-Metro_CNRM-ESM2-1_ssp370_r1i1p1f2_CNRM-MF_CNRM-AROME46t1_v1-r1_MF-CDFt-ANASTASIA-ALPX-3-1991-2020_day_20150101-20191231_2015.parquet"

# Lit les projection mappées sur les gares
df_gares = pd.read_csv('../data/gares_map_projection.csv')
df_gares.drop(['lat', 'lon'], axis=1, inplace=True)

# Lit les fichiers parquet
df_proj = pd.read_parquet(file_parquet)
df_proj = df_proj.reset_index()
df_proj.drop(['x', 'y'], axis=1, inplace=True)

# Merge avec les gares
df = pd.merge(
    left=df_proj, right=df_gares, how='left', left_on=['lat', 'lon'], right_on=['lat_proj', 'lon_proj']
).dropna().drop(['lat_proj', 'lon_proj'], axis=1)

gares = [
    'Paris Gare de Lyon',
    'Paris Gare du Nord',
    'Paris Montparnasse',
    'Marne-la-Vallée Chessy',
    'Toulouse Matabiau',
    'Carcassonne',
    'Sète',
    'Montpellier Saint-Roch',
]

df = df[df['gare'].isin(gares)]
df['tasmaxAdjust'] = np.round(df['tasmaxAdjust'] - 273.15, 2)
df = df.sort_values(['time', 'gare'], ascending=True)

df

Unnamed: 0,time,tasmaxAdjust,lon,lat,gare
4726,2015-06-01 12:00:00,30.150000,2.3652,43.229,Carcassonne
74253,2015-06-01 12:00:00,22.240000,2.7850,48.880,Marne-la-Vallée Chessy
8415,2015-06-01 12:00:00,27.080000,3.8694,43.612,Montpellier Saint-Roch
73893,2015-06-01 12:00:00,24.639999,2.3765,48.850,Paris Gare de Lyon
74237,2015-06-01 12:00:00,24.260000,2.3413,48.872,Paris Gare du Nord
...,...,...,...,...,...
8013916,2015-08-31 12:00:00,33.689999,2.3765,48.850,Paris Gare de Lyon
8014260,2015-08-31 12:00:00,33.430000,2.3413,48.872,Paris Gare du Nord
8013913,2015-08-31 12:00:00,33.639999,2.3083,48.849,Paris Montparnasse
7946215,2015-08-31 12:00:00,31.790001,3.6872,43.408,Sète


In [25]:
fig = px.scatter_map(data_frame=df, lat='lat', lon='lon', animation_group="gare", animation_frame="time", 
                     color="tasmaxAdjust", range_color=[0, 40], size="tasmaxAdjust",
                     color_continuous_scale=["white", "yellow", "orange", "purple"],
                     zoom=3.9, height=600, width=900
)
fig.show()

In [26]:
def get_filename(file:str)->str:
    filename = file.rsplit('.nc')[0]
    filename = filename.rsplit('/')[-1]
    return filename

In [37]:
parquet_files = glob.glob("/Users/9204057K/Library/CloudStorage/OneDrive-SNCF/04_Projets/GitHub/hackathon_meteo_france/data/projections_parquet/**.parquet")

for file in parquet_files:
    # Lit les fichiers parquet
    df_proj = pd.read_parquet(file)
    df_proj = df_proj.reset_index()
    df_proj.drop(['x', 'y'], axis=1, inplace=True)
    
    # Merge avec les gares
    df = pd.merge(
        left=df_proj, right=df_gares, how='left', left_on=['lat', 'lon'], right_on=['lat_proj', 'lon_proj']
    ).dropna().drop(['lat_proj', 'lon_proj'], axis=1)
    
    # Files
    filename = get_filename(file=file)
    df.to_parquet(f"../data/projections_gares/{filename}")

In [23]:
df = pd.read_parquet('../data/projections_gares/')
df['tasmaxAdjust'] = df['tasmaxAdjust'] - 273.15
df['year'] = df['time'].dt.year
df = df.sort_values(by=['time', 'lat', 'lon'])
df

Unnamed: 0,time,tasmaxAdjust,lon,lat,gare,year
994,2015-06-01 12:00:00,29.417877,1.9421,42.433,Bourg-Madame,2015
963,2015-06-01 12:00:00,24.430481,3.1584,42.436,Cerbère,2015
1057,2015-06-01 12:00:00,27.631195,1.9106,42.454,Latour-de-Carol - Enveitg,2015
1059,2015-06-01 12:00:00,25.184479,2.0322,42.457,Saillagouse,2015
1128,2015-06-01 12:00:00,24.553925,2.0311,42.480,Font-Romeu-Odeillo-Via,2015
...,...,...,...,...,...,...
8027214,2099-08-31 12:00:00,22.738495,2.4166,50.960,Bergues,2099
8027226,2099-08-31 12:00:00,22.511292,2.1311,50.977,Gravelines,2099
8027248,2099-08-31 12:00:00,22.586365,2.3078,51.003,Grande-Synthe,2099
8027261,2099-08-31 12:00:00,22.658966,2.3778,51.027,Coudekerque-Branche,2099


In [26]:
df_cond = df[df['tasmaxAdjust'] > 40]
df_cond

Unnamed: 0,time,tasmaxAdjust,lon,lat,gare,year,lag_1,lag_2
2543620,2017-06-30 12:00:00,40.267120,4.8625,43.997,Sorgues - Châteauneuf-du-Pape,2017,37.733185,37.257111
2543622,2017-06-30 12:00:00,40.403168,4.9249,43.997,Entraigues-sur-la-Sorgue,2017,40.267120,37.733185
2544199,2017-06-30 12:00:00,40.510590,4.8938,44.042,Bédarrides,2017,37.018097,39.122864
2544202,2017-06-30 12:00:00,40.940979,4.9875,44.042,Monteux,2017,40.510590,37.018097
2544204,2017-06-30 12:00:00,40.945343,5.0500,44.042,Carpentras,2017,40.940979,40.510590
...,...,...,...,...,...,...,...,...
6663017,2099-08-16 12:00:00,40.459778,4.7679,45.593,Givors Canal,2099,40.459778,37.031830
6663018,2099-08-16 12:00:00,40.128906,4.8000,45.593,Chasse-sur-Rhône,2099,40.459778,40.459778
6664596,2099-08-16 12:00:00,40.311615,0.1658,45.656,Angoulême,2099,36.553711,35.247894
7423190,2099-08-25 12:00:00,40.520844,6.4704,43.445,Les Arcs - Draguignan,2099,39.266815,35.484192


In [36]:
df['lag_1'] = df['tasmaxAdjust'].shift(1)
df['lag_2'] = df['tasmaxAdjust'].shift(2)

mask_cond = (
    (df['tasmaxAdjust'] > 40) &
    (df['lag_1'] > 35) &
    (df['lag_2'] > 35)
)
df_cond = df[mask_cond]
df_cond.to_csv("../data/projection_tasmax_cond.csv", index=False)

df_cond



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,time,tasmaxAdjust,lon,lat,gare,year,lag_1,lag_2
2544199,2017-06-30 12:00:00,40.510590,4.8938,44.042,Bédarrides,2017,40.403168,40.267120
2544202,2017-06-30 12:00:00,40.940979,4.9875,44.042,Monteux,2017,40.510590,40.403168
2544204,2017-06-30 12:00:00,40.945343,5.0500,44.042,Carpentras,2017,40.940979,40.510590
2545355,2017-06-30 12:00:00,40.482941,4.8313,44.132,Orange,2017,40.945343,40.940979
2545637,2017-06-30 12:00:00,41.263947,4.6121,44.154,Bagnols-sur-Cèze,2017,40.482941,40.945343
...,...,...,...,...,...,...,...,...
6663017,2099-08-16 12:00:00,40.459778,4.7679,45.593,Givors Canal,2099,40.459778,40.295959
6663018,2099-08-16 12:00:00,40.128906,4.8000,45.593,Chasse-sur-Rhône,2099,40.459778,40.459778
6664596,2099-08-16 12:00:00,40.311615,0.1658,45.656,Angoulême,2099,40.128906,40.459778
7423190,2099-08-25 12:00:00,40.520844,6.4704,43.445,Les Arcs - Draguignan,2099,40.311615,40.128906


In [37]:
trac_dates_1 = range(2042, 2062)
trac_dates_2 = range(2068, 2088)

df_trac_1 = df_cond[df_cond['year'].isin(trac_dates_1)]
df_trac_2 = df_cond[df_cond['year'].isin(trac_dates_2)]

df_trac_1 = df_trac_1.groupby(['year', 'gare', 'lat', 'lon'])['tasmaxAdjust'].count().reset_index()
df_trac_2 = df_trac_2.groupby(['year', 'gare', 'lat', 'lon'])['tasmaxAdjust'].count().reset_index()

df_trac_1

Unnamed: 0,year,gare,lat,lon,tasmaxAdjust
0,2042,Agen,44.211,0.6289,2
1,2042,Agonac,45.295,0.7398,2
2,2042,Aigrefeuille le Thou,46.101,-0.9440,1
3,2042,Aiguillon,44.290,0.3402,2
4,2042,Aixe-sur-Vienne,45.804,1.1545,2
...,...,...,...,...,...
4062,2060,Waligator Parc,49.226,6.1751,1
4063,2060,Wingen-sur-Moder,48.914,7.3974,1
4064,2061,Boucau,43.524,-1.4942,1
4065,2061,Marssac-sur-Tarn,43.917,2.0239,1


In [38]:
fig = px.scatter_map(data_frame=df_trac_1, lat='lat', lon='lon', animation_group="gare", animation_frame="year", 
                     color="tasmaxAdjust", size="tasmaxAdjust", range_color=[0, 6],
                     color_continuous_scale=["white", "yellow", "orange", "purple"],
                     zoom=3.9, height=600, width=900,
                     title="Nombre de jours où la condition est atteinte pour une gare, par année, TRACC 1"
)
fig.show()

In [39]:
fig = px.scatter_map(data_frame=df_trac_2, lat='lat', lon='lon', animation_group="gare", animation_frame="year", 
                     color="tasmaxAdjust", size="tasmaxAdjust", range_color=[0, 8],
                     color_continuous_scale=["white", "yellow", "orange", "purple"],
                     zoom=3.9, height=600, width=900,
                     title="Nombre de jours où la condition est atteinte pour une gare, par année, TRACC 2"
)
fig.show()

In [64]:
df_to_plot = df_cond.groupby(['year']).agg({'tasmaxAdjust': 'count', 'gare': 'unique'}).reset_index()
df_to_plot.rename(columns={'tasmaxAdjust': 'Nombre d\'occurence', 'gare': 'Nombre de gares'}, inplace=True)
df_to_plot['Nombre de gares'] = df_to_plot['Nombre de gares'].apply(lambda x: len(x))
df_to_plot['mean'] = df_to_plot["Nombre d\'occurence"].rolling(10).mean()
df_to_plot

Unnamed: 0,year,Nombre d'occurence,Nombre de gares,mean
0,2017,175,145,
1,2020,12,10,
2,2021,22,12,
3,2022,28,27,
4,2023,1533,1054,
...,...,...,...,...
66,2095,115,108,1056.8
67,2096,114,69,1059.6
68,2097,1621,973,1177.3
69,2098,968,395,1205.1


In [86]:
fig = px.line(data_frame=df_to_plot.dropna(), x='year', y=['Nombre d\'occurence', 'mean'])
fig.add_vrect(
    x0=2042, x1=2061,
    fillcolor="orange", opacity=0.15,
    layer="below", line_width=0,
    annotation_text="Période 2042-2061 : TRACC 2°C", annotation_position="top left"
)
fig.add_vrect(
    x0=2068, x1=2087,
    fillcolor="purple", opacity=0.15,
    layer="below", line_width=0,
    annotation_text="Période 2068-2087 : TRACC 2.7°C", annotation_position="top left"
)
title = "Nombre d'occurences où la condition est atteinte"
subtitle = "Condition : 2 nuits > 20°C, 3 jours > 35°C dont le 3ème > 40°C"
fig.update_layout(
    title={
        'text': f"{title}<br><span style='font-size:12px;'>{subtitle}</span>",
    }
)
fig.update_yaxes(title="Nombre d'occurence")
fig.update_xaxes(title="Année")
fig.show()

In [87]:
df_to_plot = df_cond.groupby(['time']).agg({'tasmaxAdjust': 'count', 'gare': 'unique'}).reset_index()
df_to_plot.rename(columns={'tasmaxAdjust': 'Nombre d\'occurence', 'gare': 'Nombre de gares'}, inplace=True)
df_to_plot['Nombre de gares'] = df_to_plot['Nombre de gares'].apply(lambda x: len(x))
df_to_plot['mean'] = df_to_plot["Nombre d\'occurence"].rolling(10).mean()
df_to_plot

Unnamed: 0,time,Nombre d'occurence,Nombre de gares,mean
0,2017-06-30 12:00:00,6,6,
1,2017-07-12 12:00:00,10,10,
2,2017-07-13 12:00:00,105,105,
3,2017-07-14 12:00:00,1,1,
4,2017-07-15 12:00:00,51,51,
...,...,...,...,...
825,2099-08-14 12:00:00,9,9,394.6
826,2099-08-15 12:00:00,233,233,417.7
827,2099-08-16 12:00:00,29,29,379.6
828,2099-08-25 12:00:00,1,1,367.1


In [94]:
fig = px.line(data_frame=df_to_plot.dropna()[df_to_plot['time'] > pd.to_datetime("2030-01-01", yearfirst=True)],
              x='time', y=['Nombre d\'occurence'])#, 'mean'])
fig.add_vrect(
    x0=pd.to_datetime("2042-01-01", yearfirst=True), x1=pd.to_datetime("2061-01-01", yearfirst=True),
    fillcolor="orange", opacity=0.15,
    layer="below", line_width=0,
    annotation_text="Période 2042-2061 : TRACC 2°C", annotation_position="top left"
)
fig.add_vrect(
    x0=pd.to_datetime("2068-01-01", yearfirst=True), x1=pd.to_datetime("2087-01-01", yearfirst=True),
    fillcolor="purple", opacity=0.15,
    layer="below", line_width=0,
    annotation_text="Période 2068-2087 : TRACC 2.7°C", annotation_position="top left"
)
title = "Nombre de gares où la condition est atteinte"
subtitle = "Condition : 2 nuits > 20°C, 3 jours > 35°C dont le 3ème > 40°C"
fig.update_layout(
    title={
        'text': f"{title}<br><span style='font-size:12px;'>{subtitle}</span>",
    }
)
fig.update_yaxes(title="Nombre d'occurence")
fig.update_xaxes(title="Année")
fig.show()


Boolean Series key will be reindexed to match DataFrame index.

