In [64]:
import pandas as pd
from matplotlib import pyplot as plt
import os

In [67]:
path = "C:/Users/axell/OneDrive/Documents/ENSAE/Projet-data-science-Mornand-Merienne/CSV_data/METEOFRANCE"

# Initialisation d'un dictionnaire pour stocker les DataFrames
df_meteo = {}

for year in range(2020, 2024):  
    for month in range(1, 13):  
        date = f"{year}{month:02d}"  # Format "202001", "202002", etc
        file_name = f"synop.{date}.csv.gz"
        file_path = os.path.join(path, file_name)
        df = pd.read_csv(file_path, compression='gzip', sep=';', encoding='utf-8')
        df_meteo[f"df_meteo_{date}"] = df

# Exemple pour accéder à janvier 2020 :
df_meteo_202001 = df_meteo.get("df_meteo_202001")
print(df_meteo_202001.describe())
print(df_meteo_202001['numer_sta'].nunique())

          numer_sta          date  Unnamed: 59
count  14833.000000  1.483300e+04          0.0
mean   27021.421156  2.020012e+13          NaN
std    30487.679317  8.937503e+06          NaN
min     7005.000000  2.020010e+13          NaN
25%     7207.000000  2.020011e+13          NaN
50%     7577.000000  2.020012e+13          NaN
75%    61980.000000  2.020012e+13          NaN
max    89642.000000  2.020013e+13          NaN
61


In [59]:
print(df_meteo_202001.columns)

Index(['numer_sta', 'date', 'pmer', 'tend', 'cod_tend', 'dd', 'ff', 't', 'td',
       'u', 'vv', 'ww', 'w1', 'w2', 'n', 'nbas', 'hbas', 'cl', 'cm', 'ch',
       'pres', 'niv_bar', 'geop', 'tend24', 'tn12', 'tn24', 'tx12', 'tx24',
       'tminsol', 'sw', 'tw', 'raf10', 'rafper', 'per', 'etat_sol', 'ht_neige',
       'ssfrai', 'perssfrai', 'rr1', 'rr3', 'rr6', 'rr12', 'rr24', 'phenspe1',
       'phenspe2', 'phenspe3', 'phenspe4', 'nnuage1', 'ctype1', 'hnuage1',
       'nnuage2', 'ctype2', 'hnuage2', 'nnuage3', 'ctype3', 'hnuage3',
       'nnuage4', 'ctype4', 'hnuage4', 'Unnamed: 59'],
      dtype='object')


In [68]:
colonnes_a_garder = ['numer_sta', 'date', 'pmer', 'tend', 'dd', 'ff', 't', 'u', 'vv', 'rr12']
noms_colonnes = {
    'numer_sta': 'indicatif_station',
    'date': 'date_UTC',
    'pmer': 'pression_niveau_mer_Pa',
    'tend': 'var_pression_3h_Pa',
    'dd': 'direction_vent_moyen_10mn_deg',
    'ff': 'vitesse_vent_moyen_10mn_m/s',
    't': 'temperature_K',
    'u': 'humidite_%',
    'vv': 'visibilite_horizontale_m',
    'rr12': 'precipitations_12h_mm'
}

for key in df_meteo:
    df_meteo[key] = df_meteo[key][colonnes_a_garder]
    df_meteo[key] = df_meteo[key].rename(columns=noms_colonnes)

df_meteo_202001 = df_meteo.get("df_meteo_202001")
print(df_meteo_202001.head())

   indicatif_station        date_UTC pression_niveau_mer_Pa  \
0               7005  20200101000000                 103180   
1               7015  20200101000000                 103320   
2               7020  20200101000000                 102870   
3               7027  20200101000000                 103080   
4               7037  20200101000000                 103190   

  var_pression_3h_Pa direction_vent_moyen_10mn_deg  \
0                -80                           120   
1                  0                            80   
2                -70                            80   
3                  0                           100   
4                -30                           130   

  vitesse_vent_moyen_10mn_m/s temperature_K humidite_%  \
0                    1.800000    274.350000         89   
1                    4.700000    275.250000         99   
2                    1.300000    280.550000         93   
3                    4.200000    275.750000        100   
4     

In [69]:
def filtre_mesure_minuit(df):
    df['date_UTC'] = pd.to_datetime(df['date_UTC'], format='%Y%m%d%H%M%S')

    # Garder seulement les lignes où l'heure, minute et seconde sont égales à 00
    df_minuit = df[df['date_UTC'].dt.hour == 0]
    df_minuit = df_minuit[df_minuit['date_UTC'].dt.minute == 0]
    df_minuit = df_minuit[df_minuit['date_UTC'].dt.second == 0]
    
    return df_minuit

# Appliquer cette fonction de filtrage à tous les DataFrames du dictionnaire
for key in df_meteo:
    df_meteo[key] = filtre_mesure_minuit(df_meteo[key])  # Garder seulement les mesures à minuit
    df_meteo[key]['date_UTC'] = df_meteo[key]['date_UTC'].dt.strftime('%Y-%m-%d')

# Tester le résultat pour un DataFrame spécifique
df_meteo_202001 = df_meteo.get("df_meteo_202001")
print(df_meteo_202001.head())
print(df_meteo_202001['indicatif_station'].nunique())

   indicatif_station    date_UTC pression_niveau_mer_Pa var_pression_3h_Pa  \
0               7005  2020-01-01                 103180                -80   
1               7015  2020-01-01                 103320                  0   
2               7020  2020-01-01                 102870                -70   
3               7027  2020-01-01                 103080                  0   
4               7037  2020-01-01                 103190                -30   

  direction_vent_moyen_10mn_deg vitesse_vent_moyen_10mn_m/s temperature_K  \
0                           120                    1.800000    274.350000   
1                            80                    4.700000    275.250000   
2                            80                    1.300000    280.550000   
3                           100                    4.200000    275.750000   
4                           130                    2.200000    272.250000   

  humidite_% visibilite_horizontale_m precipitations_12h_mm  
0     

In [71]:
df_meteo_tot = pd.concat(df_meteo.values(), ignore_index=True)
print(df_meteo_tot.describe())
print(df_meteo_tot.head())

       indicatif_station
count       86772.000000
mean        27005.637786
std         30332.225628
min          7005.000000
25%          7207.000000
50%          7591.000000
75%         61980.000000
max         89642.000000
   indicatif_station    date_UTC pression_niveau_mer_Pa var_pression_3h_Pa  \
0               7005  2020-01-01                 103180                -80   
1               7015  2020-01-01                 103320                  0   
2               7020  2020-01-01                 102870                -70   
3               7027  2020-01-01                 103080                  0   
4               7037  2020-01-01                 103190                -30   

  direction_vent_moyen_10mn_deg vitesse_vent_moyen_10mn_m/s temperature_K  \
0                           120                    1.800000    274.350000   
1                            80                    4.700000    275.250000   
2                            80                    1.300000    280.550000  