# Importation des modules nécessaires

In [10]:
from Extract.API import API
from Extract.Scrapping import Scrapping
from Transform.TransformData import TransformData
import pandas as pd

# Récupération de la donnée (via api)

In [11]:
# Utiliser l'API pour récupérer les données vélo/piéton
api = API()

# Lien de l'API qui retourne un CSV
lien_api_velo_pieton = (
    "https://data.rennesmetropole.fr/api/explore/v2.1/catalog/datasets/"
    "eco-counter-data/exports/csv?lang=fr&timezone=Europe%2FBerlin&"
    "use_labels=true&delimiter=%3B"
)

# Récupérer toutes les données au format CSV
csv_pieton_velo_rennes = api.get_api_data(lien_api_velo_pieton)

# Vérifier et afficher les données récupérées
if csv_pieton_velo_rennes is not None:
    print(f"Total d'enregistrements récupérés : {len(csv_pieton_velo_rennes)}")
else:
    print("Aucune donnée récupérée.")

df_velo_piton = pd.DataFrame(csv_pieton_velo_rennes)
df_velo_piton.head()


Les données ont été chargées avec succès.
Total d'enregistrements récupérés : 137218


Unnamed: 0,date,isoDate,counts,status,ID,name,counter,geo,sens
0,2017-02-16T10:00:00+01:00,2017-02-16T09:00:00+0100,6.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5
1,2017-02-17T05:00:00+01:00,2017-02-17T04:00:00+0100,0.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5
2,2017-02-16T13:00:00+01:00,2017-02-16T12:00:00+0100,18.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5
3,2017-02-17T11:00:00+01:00,2017-02-17T10:00:00+0100,6.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5
4,2017-02-16T14:00:00+01:00,2017-02-16T13:00:00+0100,11.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5


# Récupération de la data (via scrapping)

# Transformation de la data 

In [12]:
scrap = Scrapping()
col, data = scrap.scrap_site()

print(col)
df_meteo = pd.DataFrame(data, columns=col)
df_meteo.head()

Unnamed: 0,Heure locale\naccess_time\n30mn\nMETAR,Unnamed: 2,Température,Temps,Pluie,Vent,Humidité,Bio-météo,Pt. de rosée,Pression,Visibilité
0,2024-11-12 11:00:00,add_circle_outline,10.9 °C\n10 → 11.1,,0 mm/1h,25 km/h\n\nraf.40.3,78%,214,7.2 °C,1032.1hPa,35 km
1,2024-11-12 10:00:00,add_circle_outline,9.8 °C\n6.3 → 9.8,,0 mm/1h,22 km/h\nraf.29.2,87%,6.9\n\n125,7.7 °C,1032.9hPa,30 km
2,2024-11-12 09:00:00,add_circle_outline,6.3 °C\n3.9 → 6.3,,0 mm/1h,11 km/h\nraf.16.6,94%,4\n\n22,5.4 °C,1033.1hPa,19 km
3,2024-11-12 08:00:00,add_circle_outline,4.0 °C\n3.8 → 5.2,,0 mm/1h,11 km/h\nraf.14.4,97%,1.3,3.6 °C,1033.3hPa,17 km
4,2024-11-12 07:00:00,add_circle_outline,5.1 °C\n5 → 5.4,,0 mm/1h,11 km/h\nraf.12.6,96%,2.6,4.5 °C,1033.6hPa,19 km


In [13]:
transform = TransformData()

df_meteo = transform.rename_column(df_meteo, 'Heure locale\naccess_time\n30mn\nMETAR', 'date')
df_meteo = transform.remove_currency_symbols(df_meteo, ['Humidité', 'Pression', 'Visibilité', 'Température', 'Pluie', 'Vent', 'Pt. de rosée'])
df_meteo = transform.rename_column(df_meteo, 'Température', 'Température (°C)')
df_meteo = transform.rename_column(df_meteo, 'Pluie', 'Pluie (mm/h)')
df_meteo = transform.rename_column(df_meteo, 'Humidité', 'Humidité (%)')
df_meteo = transform.rename_column(df_meteo, 'Pt. de rosée', 'Point_rosé (°C)')
df_meteo = transform.rename_column(df_meteo, 'Pression', 'Pression (hPa)')
df_meteo = transform.rename_column(df_meteo, 'Visibilité', 'Visibilité (km)')
df_meteo = transform.rename_column(df_meteo, 'Vent', 'Vent (km/h)')
df_meteo = transform.remove_column(df_meteo, ['', 'Temps', 'Bio-météo'])

df_meteo.head()

Traitement de la colonne: Humidité
Traitement de la colonne: Pression
Traitement de la colonne: Visibilité
Traitement de la colonne: Température
Traitement de la colonne: Pluie
Traitement de la colonne: Vent
Traitement de la colonne: Pt. de rosée


Unnamed: 0,date,Température (°C),Pluie (mm/h),Vent (km/h),Humidité (%),Point_rosé (°C),Pression (hPa),Visibilité (km)
0,2024-11-12 11:00:00,10.9,0,25,78,7.2,1032.1,35
1,2024-11-12 10:00:00,9.8,0,22,87,7.7,1032.9,30
2,2024-11-12 09:00:00,6.3,0,11,94,5.4,1033.1,19
3,2024-11-12 08:00:00,4.0,0,11,97,3.6,1033.3,17
4,2024-11-12 07:00:00,5.1,0,11,96,4.5,1033.6,19


In [16]:
# Convertir les colonnes 'date' en format datetime sans décalage horaire pour les deux DataFrames
df_velo_piton['date'] = pd.to_datetime(df_velo_piton['date']).dt.tz_localize(None)
df_meteo['date'] = pd.to_datetime(df_meteo['date']).dt.tz_localize(None)

# Effectuer la jointure
df_merged = pd.merge(df_velo_piton, df_meteo, on='date', how='left')
df_merged.head()


datetime64[ns]
datetime64[ns]


Unnamed: 0,date,isoDate,counts,status,ID,name,counter,geo,sens,Température (°C),Pluie (mm/h),Vent (km/h),Humidité (%),Point_rosé (°C),Pression (hPa),Visibilité (km)
0,2017-02-16 10:00:00,2017-02-16T09:00:00+0100,6.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5,,,,,,,
1,2017-02-17 05:00:00,2017-02-17T04:00:00+0100,0.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5,,,,,,,
2,2017-02-16 13:00:00,2017-02-16T12:00:00+0100,18.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5,,,,,,,
3,2017-02-17 11:00:00,2017-02-17T10:00:00+0100,6.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5,,,,,,,
4,2017-02-16 14:00:00,2017-02-16T13:00:00+0100,11.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5,,,,,,,


In [17]:
df_merged = transform.remove_column(df_merged, ['status', 'ID', 'geo', 'counter', 'sens'])

df_merged.head()


Unnamed: 0,date,isoDate,counts,name,Température (°C),Pluie (mm/h),Vent (km/h),Humidité (%),Point_rosé (°C),Pression (hPa),Visibilité (km)
0,2017-02-16 10:00:00,2017-02-16T09:00:00+0100,6.0,Boulevard Georges Pompidou,,,,,,,
1,2017-02-17 05:00:00,2017-02-17T04:00:00+0100,0.0,Boulevard Georges Pompidou,,,,,,,
2,2017-02-16 13:00:00,2017-02-16T12:00:00+0100,18.0,Boulevard Georges Pompidou,,,,,,,
3,2017-02-17 11:00:00,2017-02-17T10:00:00+0100,6.0,Boulevard Georges Pompidou,,,,,,,
4,2017-02-16 14:00:00,2017-02-16T13:00:00+0100,11.0,Boulevard Georges Pompidou,,,,,,,


In [18]:
df_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 137218 entries, 0 to 137217
Data columns (total 11 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   date              55330 non-null   datetime64[ns]
 1   isoDate           137218 non-null  object        
 2   counts            134070 non-null  float64       
 3   name              137218 non-null  object        
 4   Température (°C)  2 non-null       object        
 5   Pluie (mm/h)      2 non-null       object        
 6   Vent (km/h)       2 non-null       object        
 7   Humidité (%)      2 non-null       object        
 8   Point_rosé (°C)   2 non-null       object        
 9   Pression (hPa)    2 non-null       object        
 10  Visibilité (km)   2 non-null       object        
dtypes: datetime64[ns](1), float64(1), object(9)
memory usage: 11.5+ MB
