In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA

from sklearn.metrics import mean_squared_error
from math import sqrt

import pickle
import os

In [None]:
# Lecture de nos données
df = pd.read_csv('export_infoclimat.csv', sep=';')

In [None]:
df.info()

In [None]:
print(df.isnull().sum())

In [None]:
# Formatage dans un format date
df['dh_utc'] = pd.to_datetime(df['dh_utc'])
df = df.set_index('dh_utc').sort_index()

# Suppression des colonne station_id (toujours identique) et vent_rafales (toujours nulle)
df = df.drop(columns=['station_id', 'vent_rafales'])

In [None]:
# Remplacement des valeurs nulles
df['pluie_3h'] = df['pluie_3h'].fillna(0)
df['pluie_1h'] = df['pluie_1h'].fillna(0)
for column in df.columns:
    if df[column].dtype in ['float64', 'int64'] and column not in ['pluie_3h', 'pluie_1h']:
        df[column] = df[column].fillna(df[column].mean())

In [None]:
# Regroupement des valeurs par jour
df_daily = df.resample('D').mean()
df_daily = df_daily.dropna()

In [None]:
# Création d'un graphique pour visualiser les temperature
plt.figure(figsize=(15, 4))
plt.plot(df_daily['temperature'], marker='o', linestyle='-')
plt.gca().set(title='Température moyenne quotidienne', xlabel='Date', ylabel='Température (°C)')
plt.xticks(rotation=90)
plt.grid(True)
plt.show()

In [None]:
df_daily_temp = df['temperature'].resample('D').interpolate()

In [None]:
# Décomposition saisonière pour la temperature
result = seasonal_decompose(df_daily_temp, model='additive', period=365)

In [None]:
# Génération des graphiques
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(10, 8))
result.observed.plot(ax=ax1)
ax1.set_ylabel('Observé')
result.trend.plot(ax=ax2)
ax2.set_ylabel('Tendance')
result.seasonal.plot(ax=ax3)
ax3.set_ylabel('Saisonnalité')
result.resid.plot(ax=ax4)
ax4.set_ylabel('Résidu')
plt.tight_layout()
plt.show()

In [None]:
# Décomposition saisonière pour la pluie
df_daily_rain = df['pluie_1h'].resample('D').interpolate()
df_daily_rain = df_daily_rain.dropna()

In [None]:
result = seasonal_decompose(df_daily_rain, model='additive', period=365)

In [None]:
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(10, 8))
result.observed.plot(ax=ax1)
ax1.set_ylabel('Observé')
result.trend.plot(ax=ax2)
ax2.set_ylabel('Tendance')
result.seasonal.plot(ax=ax3)
ax3.set_ylabel('Saisonnalité')
result.resid.plot(ax=ax4)
ax4.set_ylabel('Résidu')
plt.tight_layout()
plt.show()

In [None]:
# Recuperation des dataset
def get_dataset(champ):
    train_df = df_daily.iloc[808:1903]
    X_train = train_df[champ]

    test_df = df_daily.iloc[1903:1913]
    X_test = test_df[champ]

    return (train_df, X_train, test_df, X_test)

In [None]:
# Fonction d'entrainement d'un model ARIMA sur un champ
def modelisation(champ):
    train_df, X_train, test_df, X_test = get_dataset(champ)

    model_ARIMA = ARIMA(X_train, order=(30, 1, 30))  
    results_ARIMA = model_ARIMA.fit()  
    rmse = sqrt(mean_squared_error(train_df[champ], results_ARIMA.fittedvalues))
    print('Test RMSE: %.3f' % rmse)
    return results_ARIMA

In [None]:
train_df, X_train, test_df, X_test = get_dataset('temperature')

In [None]:
# Entrainement de tous les modèles
temperature_model = modelisation('temperature')
pression_model = modelisation('pression')
humidite_model = modelisation('humidite')
rosee_model = modelisation('point_de_rosee')
vent_moyen_model = modelisation('vent_moyen')
vent_direction_model = modelisation('vent_direction')

In [None]:
# Export de tous les modèles
if not os.path.exists('./models'):
    os.mkdir('models')
    pass
with open('models/temperature_model.pkl', "wb") as file:
    pickle.dump(temperature_model, file)
with open('models/pression_model.pkl', "wb") as file:
    pickle.dump(pression_model, file)
with open('models/humidite_model.pkl', "wb") as file:
    pickle.dump(humidite_model, file)
with open('models/rosee_model.pkl', "wb") as file:
    pickle.dump(rosee_model, file)
with open('models/vent_moyen_model.pkl', "wb") as file:
    pickle.dump(vent_moyen_model, file)
with open('models/vent_direction_model.pkl', "wb") as file:
    pickle.dump(vent_direction_model, file)

In [None]:
# # Pour relancer avec les données de test

# champ = ''

# train_df, X_train, test_df, X_test = get_dataset(champ)

# history_temp = [x for x in train_df]
# predictions_temp = list()

# # walk-forward validation
# for t in range(len(X_test)):
#     model = ARIMA(history_temp, order=(30, 1, 30))
#     model_fit = model.fit()
#     output = model_fit.forecast()
#     yhat = output[0]
#     obs = week_test.iloc[t]
#     predictions_temp.append(yhat)
#     history_temp.append(obs)
#     print('predicted=' + str(yhat) +', expected=' + str(obs))