# Import librairies

In [4]:
import pandas as pd, numpy as np, os, warnings, seaborn as sns
from datetime import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
font1 = fm.FontProperties(size=20)
font2 = fm.FontProperties(size=24)

%matplotlib inline

if int(str(sns.__version__).split('.')[1]) > 8 : 
    plt.style.use('seaborn-v0_8-darkgrid')
else:
    plt.style.use('seaborn-darkgrid')
    
sns.set(font_scale=2)
warnings.filterwarnings(action="ignore")

In [7]:
donnees = pd.read_csv('../../_git/donnees/synop.202310.csv',
                           sep=';',
                           usecols=['numer_sta','date','dd','ff','t','u','vv',
                                    'pres','rr1','rr3','rr6','rr12','rr24'],
                           na_values='mq',
                           dtype={'numer_sta':str,'date':str},
                          ).rename( columns={'numer_sta':'Station',
                                      'date':'DateHeure',
                                      'dd':'DirectionVent',
                                      'ff':'VitesseVent',
                                      't':'Temperature',
                                      'u':'Humidite',
                                      'vv':'Visibilite',
                                      'pres':'Pression',
                                      'rr1' :'Precipitation01',
                                      'rr3' :'Precipitation03',
                                      'rr6' :'Precipitation06',
                                      'rr12':'Precipitation12',
                                      'rr24':'Precipitation24'})
donnees["DateHeure"] = pd.to_datetime(donnees["DateHeure"], format='%Y%m%d%H%M%S')

In [8]:
donnees.Temperature  = donnees.Temperature - 273.15
donnees.Pression     = donnees.Pression / 100
donnees.Visibilite   = donnees.Visibilite / 1000

donnees['Precipitation'] =  donnees['Precipitation03'].combine_first(donnees['Precipitation06']/2)\
                                                  .combine_first(donnees['Precipitation12']/4)\
                                                  .combine_first(donnees['Precipitation24']/8)\
                                                  .combine_first(donnees['Precipitation01']*3)
donnees.drop(columns=['Precipitation06',
                      'Precipitation12',
                      'Precipitation24',
                      'Precipitation01',
                      'Precipitation03'], inplace=True)

donnees['Semaine']   = donnees.DateHeure.dt.isocalendar().week
donnees['Jour']      = donnees.DateHeure.dt.day
donnees['Heure']     = donnees.DateHeure.dt.hour

In [9]:
donnees.head()

Unnamed: 0,Station,DateHeure,DirectionVent,VitesseVent,Temperature,Humidite,Visibilite,Pression,Precipitation,Semaine,Jour,Heure
0,7005,2023-10-01,150.0,2.1,13.7,89.0,3.35,1015.9,-0.1,39,1,0
1,7015,2023-10-01,160.0,1.8,12.0,90.0,19.74,1019.0,0.0,39,1,0
2,7020,2023-10-01,200.0,4.8,18.2,90.0,16.0,1021.6,0.0,39,1,0
3,7027,2023-10-01,140.0,1.3,13.0,95.0,17.24,1015.9,0.0,39,1,0
4,7037,2023-10-01,150.0,2.3,13.0,93.0,19.43,1006.1,0.0,39,1,0


In [11]:
postes = pd.read_csv('../../_git/donnees/postesSynop.csv',sep=';',dtype={'ID':str})
postes.Nom =postes.Nom.apply(lambda x : x if x in ['CLERMONT-FD','MONT-DE-MARSAN',
                                       'ST-PIERRE','ST-BARTHELEMY METEO'] 
                               else x[0:x.find('-')] 
                                    if x.find('-') != -1 else x).apply(lambda x : str(x).title())
postes.Altitude = postes.Altitude.astype('int16')
postes = postes[postes.ID < '08000']
postes.loc[postes.Latitude  < postes.Latitude.mean(),'Zone'] = 'S'
postes.loc[postes.Latitude  > postes.Latitude.mean(),'Zone'] = 'N'
postes.loc[postes.Longitude < postes.Longitude.mean(),'Zone'] += 'O'
postes.loc[postes.Longitude > postes.Longitude.mean(),'Zone'] += 'E'

In [12]:
meteo = postes.merge(donnees, how = "inner", 
                     left_on = "ID", right_on = "Station").drop(["ID","Station"], axis = "columns")

In [13]:
meteo.head()

Unnamed: 0,Nom,Latitude,Longitude,Altitude,Zone,DateHeure,DirectionVent,VitesseVent,Temperature,Humidite,Visibilite,Pression,Precipitation,Semaine,Jour,Heure
0,Abbeville,50.136,1.834,69,NO,2023-10-01 00:00:00,150.0,2.1,13.7,89.0,3.35,1015.9,-0.1,39,1,0
1,Abbeville,50.136,1.834,69,NO,2023-10-01 03:00:00,140.0,3.0,13.1,92.0,17.5,1015.0,0.0,39,1,3
2,Abbeville,50.136,1.834,69,NO,2023-10-01 06:00:00,140.0,2.5,12.3,95.0,12.99,1014.9,0.0,39,1,6
3,Abbeville,50.136,1.834,69,NO,2023-10-01 09:00:00,150.0,2.3,17.4,81.0,18.49,1015.1,0.0,39,1,9
4,Abbeville,50.136,1.834,69,NO,2023-10-01 12:00:00,180.0,1.6,24.2,60.0,19.91,1014.3,0.0,39,1,12


In [14]:
meteo.to_parquet('meteo.gzip',compression='gzip', engine='pyarrow')


In [15]:
meteo_nulls = meteo[['Nom', 'Zone', 'VitesseVent', 'Temperature', 'Humidite', 'Visibilite',
       'Pression', 'Precipitation']].groupby(['Nom']).count()

for col in ['VitesseVent', 'Temperature', 'Humidite', 'Visibilite',
       'Pression', 'Precipitation']:
    meteo_nulls[col] = meteo_nulls['Zone'] - meteo_nulls[col]
    
meteo_nulls[meteo_nulls['Temperature'] > 0].head(48)

meteo_nulls[meteo_nulls['VitesseVent']+
            meteo_nulls['Temperature']+
            meteo_nulls['Humidite']+
            meteo_nulls['Visibilite']+
            meteo_nulls['Pression']+
            meteo_nulls['Precipitation'] > 0].head(48)

Unnamed: 0_level_0,Zone,VitesseVent,Temperature,Humidite,Visibilite,Pression,Precipitation
Nom,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Ajaccio,247,0,1,1,0,0,1
Alencon,236,0,0,0,0,0,1
Belle Ile,248,0,0,36,46,0,0
Brest,246,7,0,0,0,0,1
Caen,247,0,0,0,0,0,1
Cap Cepet,248,0,248,248,154,248,248
Clermont-Fd,248,0,0,0,0,0,1
Embrun,248,0,0,0,248,0,0
Gourdon,248,0,0,0,3,0,1
Marignane,248,0,0,0,0,0,1
