In [None]:
import json
import lzma
import glob
import pandas as pd
import psutil
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import plotly.express as px
from IPython.display import display, Markdown
import tkinter as tk
from tkcalendar import Calendar
from datetime import datetime, timedelta
import folium
from IPython.display import display, IFrame
from sklearn.cluster import DBSCAN
from folium.plugins import Draw
from tkinter import filedialog
import webbrowser
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import ttest_rel, wilcoxon, shapiro
warnings.filterwarnings("ignore")

### Data import

In [None]:
data=pd.read_excel('Data/Data_air_breizh.xlsx', sheet_name=1)
print(type(data))  # Prints the type of the data
print(len(data))  # Prints the length (if it's a list or dictionary)

In [None]:
print(list(data.keys()))  

In [None]:
display(data)

In [None]:
# Sélectionner les colonnes qui contiennent "P25E"
cols_p25e = [col for col in data.columns if 'P25E' in col]

# Ajouter éventuellement la colonne 'date' si tu veux la garder aussi
cols_to_keep = ['date'] + cols_p25e

# Nouveau DataFrame avec uniquement ces colonnes
df = data[cols_to_keep]
print(df.head())


In [None]:
moyenne_airb= df['P25E_LAE'].mean()
print(moyenne_airb)

In [None]:
df_2020 = pd.read_pickle("Data/pollution_rennes.pkl")
df_2020 = df_2020.reset_index()
print("dataframe chargés")

In [None]:
df_2020['date'] = pd.to_datetime(df_2020['measure_date'])
df_2020= df_2020.set_index('date')

In [None]:
moyenne_aqmo= df_2020['PM_2.5'].mean()
print(moyenne_aqmo)

In [None]:
display(df_2020)

In [None]:
df_2020 = df_2020.groupby(
    df_2020.index.get_level_values('date').floor('D')
).agg(
    PM2_5_AQMO=('PM_2.5', 'mean')
).reset_index()

df_2020['date'] = df_2020['date'].dt.tz_localize(None)
display(df_2020)


In [None]:
df=df.set_index('date')
df_day = df.groupby(
    df.index.get_level_values('date').floor('D')
).agg(
    PM2_5_pba=('P25E_PBA', 'mean'),
    PM2_5_lae=('P25E_LAE', 'mean')  # ou une autre fonction
).reset_index()

df_day['date'] = df_day['date'].dt.tz_localize(None)
display(df_day)


In [None]:
resultat = pd.merge(df_2020, df, on='date', how='right')
resultat=resultat.dropna()
resultat=resultat.set_index('date')
display(resultat)


## Comparaison Aqmo 

In [None]:
moyenne_AQMO= resultat

In [None]:
correlation_matrix = resultat.corr(method='spearman')
print(correlation_matrix)

In [None]:
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title("Matrice de corrélation")
plt.show()


In [None]:
# Tracé du nuage de points
plt.scatter(resultat['PM2_5_AQMO'],resultat['P25E_PBA'])
plt.title("Nuage de points des données de pollution")
plt.xlabel("AQMO")
plt.ylabel("PBA Air Breizh")
plt.show()

In [None]:
resultat['diff'] = resultat['PM2_5_AQMO'] - resultat['P25E_PBA']

# Histogramme
sns.histplot(resultat['diff'], bins=30, kde=False, stat='density',
             color='orange', edgecolor='black', label='Données')

# Courbe normale
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)


# Texte affiché en haut à droite
text_str = f"μ = {mu:.2f}\nσ = {std:.2f}"
plt.text(xmax * 0.60, 0.1 * 0.9, text_str,
         fontsize=12, bbox=dict(facecolor='white', edgecolor='black'))

# Personnalisation
plt.title("Histogramme des différences des valeurs de pollution")
plt.xlabel("Différence (AQMO Irisa - Air Breizh Pays Bas)")
plt.ylabel("Densité")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
serie = resultat['diff']
# Create a figure with 2 subplots
fig, axes = plt.subplots(2, 1, figsize=(10, 8))

# Plot autocorrelation (ACF)
plot_acf(serie, ax=axes[0], lags=100)
axes[0].set_title('Autocorrelation (ACF)')

# Plot partial autocorrelation (PACF)
plot_pacf(serie, ax=axes[1], lags=100)
axes[1].set_title('Partial Autocorrelation (PACF)')

plt.show()


### Test de wilcoxon pour savoir si les différences entre les données AQMO et d'Air Breizh

In [None]:
stat, p_value = wilcoxon(resultat['PM2_5_AQMO'], resultat['P25E_PBA'])
print(stat)
print(p_value)

### Erreur des capteurs AQMO

In [None]:
mae = mean_absolute_error(resultat['PM2_5_AQMO'], resultat['P25E_PBA'])
rmse = np.sqrt(mean_squared_error(resultat['PM2_5_AQMO'], resultat['P25E_PBA']))

print("MAE :", mae)
print("RMSE :", rmse)