# Importation des modules nécessaires

In [15]:
from Extract.API import API
from Extract.Scrapping import Scrapping
from Transform.TransformData import TransformData   
import pandas as pd

# Récupération de la donnée (via api)

In [16]:
# Utiliser l'API pour récupérer les données vélo/piéton
api = API()

# Lien de l'API qui retourne un CSV
lien_api_velo_pieton = (
    "https://data.rennesmetropole.fr/api/explore/v2.1/catalog/datasets/"
    "eco-counter-data/exports/csv?lang=fr&timezone=Europe%2FBerlin&"
    "use_labels=true&delimiter=%3B"
)

# Récupérer toutes les données au format CSV
csv_pieton_velo_rennes = api.get_api_data(lien_api_velo_pieton)

# Vérifier et afficher les données récupérées
if csv_pieton_velo_rennes is not None:
    print(f"Total d'enregistrements récupérés : {len(csv_pieton_velo_rennes)}")
else:
    print("Aucune donnée récupérée.")

df_velo_piton = pd.DataFrame(csv_pieton_velo_rennes)
df_velo_piton.head()


Les données ont été chargées avec succès.
Total d'enregistrements récupérés : 137362


Unnamed: 0,date,isoDate,counts,status,ID,name,counter,geo,sens
0,2017-02-16T10:00:00+01:00,2017-02-16T09:00:00+0100,6.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5
1,2017-02-16T13:00:00+01:00,2017-02-16T12:00:00+0100,18.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5
2,2017-02-16T14:00:00+01:00,2017-02-16T13:00:00+0100,11.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5
3,2017-02-16T17:00:00+01:00,2017-02-16T16:00:00+0100,31.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5
4,2017-02-16T20:00:00+01:00,2017-02-16T19:00:00+0100,18.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5


# Récupération de la data (via scrapping)

In [17]:
# Importation de notre class de scraping 
scrap = Scrapping()

### Scrap toute les datas d'aujourd'hui 

In [18]:
col, data = scrap.scrap_site()

df_meteo_aujourdhui = pd.DataFrame(data, columns=col)
df_meteo_aujourdhui.head()

Unnamed: 0,Heure locale\naccess_time\n30mn\nMETAR,Unnamed: 2,Température,Temps,Pluie,Vent,Humidité,Bio-météo,Pt. de rosée,Pression,Visibilité
0,2024-11-15 11:00:00,add_circle_outline,6.9 °C\n4.2 → 7.6,,0 mm/1h,4 km/h\nraf.9.4,100%,6.8\n\n203,6.9 °C,1027.9hPa,300 m
1,2024-11-15 10:00:00,add_circle_outline,4.1 °C\n2.4 → 4.1,,0 mm/1h,0 km/h\nraf.5.8,100%,4.1\n\n78,4.1 °C,1028.0hPa\n\n=,100 m
2,2024-11-15 09:00:00,add_circle_outline,2.5 °C\n2.5 → 3,,0 mm/1h,4 km/h\nraf.5.8,100%,1.9\n\n8,2.5 °C,1028.0hPa,100 m
3,2024-11-15 08:00:00,add_circle_outline,3.1 °C\n3 → 4.3,,0 mm/1h,7 km/h\nraf.7.6,100%,1.1,3.1 °C,1027.9hPa,100 m
4,2024-11-15 07:00:00,add_circle_outline,4.1 °C\n4.1 → 4.5,,0 mm/1h,0 km/h\nraf.5.8,99%,4.1,4 °C,1028.0hPa,200 m


### Scrap à partir d'une date

In [19]:
# Scrap à partir de x date et renvoie un csv dans dataset et renvoie un dataframe 
df_meteo_date = scrap.scrap_with_start_date('2024-11-12')

df_meteo_date.head()

La page du 12 novembre 2024 a été scrap !
La page du 13 novembre 2024 a été scrap !
La page du 14 novembre 2024 a été scrap !


Unnamed: 0,Heure locale\naccess_time\n30mn\nMETAR,Unnamed: 2,Température,Temps,Pluie,Vent,Humidité,Bio-météo,Pt. de rosée,Pression,Visibilité
0,2024-11-12 00:00:00,add_circle_outline,8.4 °C\n8.4 → 8.9,,0 mm/1h,14 km/h\nraf.29.9,79%,5.9,5 °C,1030.8hPa,26 km
1,2024-11-12 23:00:00,add_circle_outline,8.7 °C\n8.5 → 9.3,,0 mm/1h,14 km/h\nraf.28.4,80%,6.3,5.4 °C,1031.0hPa,25 km
2,2024-11-12 22:00:00,add_circle_outline,9.3 °C\n9.3 → 10.3,,0 mm/1h,18 km/h\n\nraf.32,76%,6.6,5.3 °C,1030.6hPa,30 km
3,2024-11-12 21:00:00,add_circle_outline,10.3 °C\n10.3 → 10.7,,0 mm/1h,11 km/h\n\nraf.31.3,72%,,5.5 °C,1030.4hPa,35 km
4,2024-11-12 20:00:00,add_circle_outline,10.4 °C\n10.1 → 10.7,,0 mm/1h,18 km/h\n\nraf.30.6,73%,,5.8 °C,1030.0hPa,40 km


# Transformation de la data

In [20]:
df_meteo = pd.read_csv("./dataset/meteo_rennes.csv")
df_meteo.head()

Unnamed: 0.1,Unnamed: 0,Heure locale\naccess_time\n30mn\nMETAR,Unnamed: 2,Température,Temps,Pluie,Vent,Humidité,Bio-météo,Pt. de rosée,Pression,Visibilité
0,0,2023-01-01 00:00:00,add_circle_outline,9.5 °C\n9.5 → 9.9,,0 mm/1h,7 km/h\nraf.10.8,94%,8.5,8.6 °C,1013.3hPa,60 km
1,1,2023-01-01 23:00:00,add_circle_outline,9.9 °C\n9.9 → 10.5,,0 mm/1h,7 km/h\nraf.28.8,94%,9.0,9 °C,1012.8hPa,60 km
2,2,2023-01-01 22:00:00,add_circle_outline,10.5 °C\n10.5 → 13.1,,0 mm/1h,22 km/h\n\nraf.42.5,91%,,9.1 °C,1011.6hPa,60 km
3,3,2023-01-01 21:00:00,add_circle_outline,13.1 °C\n13 → 13.4,,0 mm/1h,22 km/h\n\nraf.39.2,89%,,11.3 °C,1010.1hPa,14 km
4,4,2023-01-01 20:00:00,add_circle_outline,13.0 °C\n12.1 → 13,,0 mm/1h,18 km/h\n\nraf.34.9,89%,,11.2 °C,1008.6hPa,30 km


In [21]:
transform = TransformData()

df_meteo = transform.remove_currency_symbols(df_meteo, ['Humidité', 'Pression', 'Visibilité', 'Température', 'Pluie', 'Vent', 'Pt. de rosée'])

df_meteo = transform.rename_column(df_meteo, 'Heure locale\naccess_time\n30mn\nMETAR', 'date')
df_meteo = transform.rename_column(df_meteo, 'Température', 'Température (°C)')
df_meteo = transform.rename_column(df_meteo, 'Pluie', 'Pluie (mm/h)')
df_meteo = transform.rename_column(df_meteo, 'Humidité', 'Humidité (%)')
df_meteo = transform.rename_column(df_meteo, 'Pt. de rosée', 'Point_rosé (°C)')
df_meteo = transform.rename_column(df_meteo, 'Pression', 'Pression (hPa)')
df_meteo = transform.rename_column(df_meteo, 'Visibilité', 'Visibilité (km)')
df_meteo = transform.rename_column(df_meteo, 'Vent', 'Vent (km/h)')

df_meteo = transform.remove_column(df_meteo, ['Unnamed: 0', 'Unnamed: 2', 'Temps', 'Bio-météo'])

df_meteo.head()

Traitement de la colonne: Humidité
Traitement de la colonne: Pression
Traitement de la colonne: Visibilité
Traitement de la colonne: Température
Traitement de la colonne: Pluie
Traitement de la colonne: Vent
Traitement de la colonne: Pt. de rosée


Unnamed: 0,date,Température (°C),Pluie (mm/h),Vent (km/h),Humidité (%),Point_rosé (°C),Pression (hPa),Visibilité (km)
0,2023-01-01 00:00:00,9.5,0,7,94,8.6,1013.3,60
1,2023-01-01 23:00:00,9.9,0,7,94,9.0,1012.8,60
2,2023-01-01 22:00:00,10.5,0,22,91,9.1,1011.6,60
3,2023-01-01 21:00:00,13.1,0,22,89,11.3,1010.1,14
4,2023-01-01 20:00:00,13.0,0,18,89,11.2,1008.6,30


In [22]:
# Convertir les colonnes 'date' en format datetime sans décalage horaire pour les deux DataFrames
df_velo_piton['date'] = pd.to_datetime(df_velo_piton['date'], utc=True).dt.tz_convert(None)
df_meteo['date'] = pd.to_datetime(df_meteo['date'], utc=True).dt.tz_convert(None)

# Effectuer la jointure
df_merged = pd.merge(df_velo_piton, df_meteo, on='date', how='left')
df_merged.head()


Unnamed: 0,date,isoDate,counts,status,ID,name,counter,geo,sens,Température (°C),Pluie (mm/h),Vent (km/h),Humidité (%),Point_rosé (°C),Pression (hPa),Visibilité (km)
0,2017-02-16 09:00:00,2017-02-16T09:00:00+0100,6.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5,,,,,,,
1,2017-02-16 12:00:00,2017-02-16T12:00:00+0100,18.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5,,,,,,,
2,2017-02-16 13:00:00,2017-02-16T13:00:00+0100,11.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5,,,,,,,
3,2017-02-16 16:00:00,2017-02-16T16:00:00+0100,31.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5,,,,,,,
4,2017-02-16 19:00:00,2017-02-16T19:00:00+0100,18.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5,,,,,,,


In [23]:
df_merged = transform.remove_column(df_merged, ['status', 'ID', 'geo', 'counter', 'sens'])

df_merged.head()


Unnamed: 0,date,isoDate,counts,name,Température (°C),Pluie (mm/h),Vent (km/h),Humidité (%),Point_rosé (°C),Pression (hPa),Visibilité (km)
0,2017-02-16 09:00:00,2017-02-16T09:00:00+0100,6.0,Boulevard Georges Pompidou,,,,,,,
1,2017-02-16 12:00:00,2017-02-16T12:00:00+0100,18.0,Boulevard Georges Pompidou,,,,,,,
2,2017-02-16 13:00:00,2017-02-16T13:00:00+0100,11.0,Boulevard Georges Pompidou,,,,,,,
3,2017-02-16 16:00:00,2017-02-16T16:00:00+0100,31.0,Boulevard Georges Pompidou,,,,,,,
4,2017-02-16 19:00:00,2017-02-16T19:00:00+0100,18.0,Boulevard Georges Pompidou,,,,,,,


In [24]:
# Suppression des lignes où la colonne 'Température (°C)' a une valeur NaN
df_cleaned = df_merged.dropna(subset=['Température (°C)'])
df_cleaned_all = df_cleaned.drop_duplicates(subset=['date'])

df_cleaned_all = transform.remove_column(df_cleaned_all, 'isoDate')
# Affichage des premières lignes pour vérifier le résultat
df_cleaned_all.head()

Unnamed: 0,date,counts,name,Température (°C),Pluie (mm/h),Vent (km/h),Humidité (%),Point_rosé (°C),Pression (hPa),Visibilité (km)
33391,2023-01-01 07:00:00,7.0,Eco-Display Place de Bretagne,11.8,0,22,82,8.8,1014.1,17
33392,2023-01-01 09:00:00,12.0,Eco-Display Place de Bretagne,11.3,0,18,84,8.7,1014.1,40
33393,2023-01-01 17:00:00,35.0,Eco-Display Place de Bretagne,11.6,0,18,91,10.2,1008.7,28
33394,2023-01-02 09:00:00,65.0,Eco-Display Place de Bretagne,7.9,0,7,98,7.6,1019.2,35
33395,2023-01-02 15:00:00,142.0,Eco-Display Place de Bretagne,13.0,0,11,69,7.4,1022.4,55


In [25]:
df_cleaned_all.count()

date                16366
counts              15920
name                16366
Température (°C)    16366
Pluie (mm/h)        16366
Vent (km/h)         16366
Humidité (%)        16366
Point_rosé (°C)     16366
Pression (hPa)      16366
Visibilité (km)     16366
dtype: int64

# Visualisation des datas 