In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path

DATA_DIR = Path(__file__).resolve().parents[1] / "data"
train = pd.read_csv(DATA_DIR / "train.csv", parse_dates=["id"])  # id is timestamp

print(train.head())
print(train.describe(include="all"))



In [None]:
# Basic time series structure
train = train.sort_values("id").set_index("id")
fig, axes = plt.subplots(5, 1, figsize=(14, 14), sharex=True)
cols = ["valeur_NO2", "valeur_CO", "valeur_O3", "valeur_PM10", "valeur_PM25"]
for ax, c in zip(axes, cols):
    train[c].plot(ax=ax)
    ax.set_title(c)
plt.tight_layout()
plt.show()



In [None]:
# Hour-of-day and day-of-week patterns
train = train.copy()
train["hour"] = train.index.hour
train["dow"] = train.index.dayofweek
fig, axes = plt.subplots(2, 5, figsize=(18, 8))
for j, c in enumerate(cols):
    sns.lineplot(data=train, x="hour", y=c, ax=axes[0, j])
    axes[0, j].set_title(f"Hour pattern - {c}")
    sns.lineplot(data=train, x="dow", y=c, ax=axes[1, j])
    axes[1, j].set_title(f"DOW pattern - {c}")
plt.tight_layout()
plt.show()

