# Importation des modules nécessaires

In [1]:
from Extract.API import API
from Extract.Scrapping import Scrapping
from Transform.TransformData import TransformData   
import pandas as pd

# Récupération de la donnée (via api)

In [2]:
# Utiliser l'API pour récupérer les données vélo/piéton
api = API()

# Lien de l'API qui retourne un CSV
lien_api_velo_pieton = (
    "https://data.rennesmetropole.fr/api/explore/v2.1/catalog/datasets/"
    "eco-counter-data/exports/csv?lang=fr&timezone=Europe%2FBerlin&"
    "use_labels=true&delimiter=%3B"
)

# Récupérer toutes les données au format CSV
csv_pieton_velo_rennes = api.get_api_data(lien_api_velo_pieton)

# Vérifier et afficher les données récupérées
if csv_pieton_velo_rennes is not None:
    print(f"Total d'enregistrements récupérés : {len(csv_pieton_velo_rennes)}")
else:
    print("Aucune donnée récupérée.")

df_velo_piton = pd.DataFrame(csv_pieton_velo_rennes)
df_velo_piton.head()


Les données ont été chargées avec succès.
Total d'enregistrements récupérés : 137554


Unnamed: 0,date,isoDate,counts,status,ID,name,counter,geo,sens
0,2017-02-16T10:00:00+01:00,2017-02-16T09:00:00+0100,6.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5
1,2017-02-16T13:00:00+01:00,2017-02-16T12:00:00+0100,18.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5
2,2017-02-16T14:00:00+01:00,2017-02-16T13:00:00+0100,11.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5
3,2017-02-16T17:00:00+01:00,2017-02-16T16:00:00+0100,31.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5
4,2017-02-16T20:00:00+01:00,2017-02-16T19:00:00+0100,18.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5


# Récupération de la data (via scrapping)

In [3]:
# Importation de notre class de scraping 
scrap = Scrapping()

### Scrap toute les datas d'aujourd'hui 

In [4]:
# col, data = scrap.scrap_site()

# df_meteo_aujourdhui = pd.DataFrame(data, columns=col)
# df_meteo_aujourdhui.head()

### Scrap à partir d'une date

In [5]:
# # Scrap à partir de x date et renvoie un csv dans dataset et renvoie un dataframe 
# df_meteo_date = scrap.scrap_with_start_date('2024-11-12')

# df_meteo_date.head()

# Transformation de la data

In [29]:
df_meteo = pd.read_csv("./dataset/meteo_rennes.csv")
df_meteo.count()

Unnamed: 0                                16368
Heure locale\naccess_time\n30mn\nMETAR    16368
Unnamed: 2                                16339
Température                               16362
Temps                                        44
Pluie                                     16344
Vent                                      16350
Humidité                                  16343
Bio-météo                                 11867
Pt. de rosée                              16334
Pression                                  16318
Visibilité                                16297
dtype: int64

In [7]:
transform = TransformData()

df_meteo = transform.remove_currency_symbols(df_meteo, ['Humidité', 'Pression', 'Visibilité', 'Température', 'Pluie', 'Vent', 'Pt. de rosée'])

df_meteo = transform.rename_column(df_meteo, 'Heure locale\naccess_time\n30mn\nMETAR', 'date')
df_meteo = transform.rename_column(df_meteo, 'Température', 'Temperature')
df_meteo = transform.rename_column(df_meteo, 'Humidité', 'Humidite')
df_meteo = transform.rename_column(df_meteo, 'Pt. de rosée', 'Point_rose')
df_meteo = transform.rename_column(df_meteo, 'Pression', 'Pression')
df_meteo = transform.rename_column(df_meteo, 'Visibilité', 'Visibilite')

df_meteo = transform.remove_column(df_meteo, ['Unnamed: 0', 'Unnamed: 2', 'Temps', 'Bio-météo'])

df_meteo.head()

Traitement de la colonne: Humidité
Traitement de la colonne: Pression
Traitement de la colonne: Visibilité
Traitement de la colonne: Température
Traitement de la colonne: Pluie
Traitement de la colonne: Vent
Traitement de la colonne: Pt. de rosée


Unnamed: 0,date,Temperature,Pluie,Vent,Humidite,Point_rose,Pression,Visibilite
0,2023-01-01 00:00:00,,1.0,,94.0,8.6,1013.3,60.0
1,2023-01-01 23:00:00,,1.0,,94.0,9.0,1012.8,60.0
2,2023-01-01 22:00:00,,1.0,,91.0,9.1,1011.6,60.0
3,2023-01-01 21:00:00,,1.0,,89.0,11.3,1010.1,14.0
4,2023-01-01 20:00:00,,1.0,,89.0,11.2,1008.6,30.0


In [8]:
# Convertir les colonnes 'date' en format datetime sans décalage horaire pour les deux DataFrames
df_velo_piton['date'] = pd.to_datetime(df_velo_piton['date'], utc=True).dt.tz_convert(None)
df_meteo['date'] = pd.to_datetime(df_meteo['date'], utc=True).dt.tz_convert(None)

# Effectuer la jointure
df_merged = pd.merge(df_velo_piton, df_meteo, on='date', how='left')
df_merged.head()


Unnamed: 0,date,isoDate,counts,status,ID,name,counter,geo,sens,Temperature,Pluie,Vent,Humidite,Point_rose,Pression,Visibilite
0,2017-02-16 09:00:00,2017-02-16T09:00:00+0100,6.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5,,,,,,,
1,2017-02-16 12:00:00,2017-02-16T12:00:00+0100,18.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5,,,,,,,
2,2017-02-16 13:00:00,2017-02-16T13:00:00+0100,11.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5,,,,,,,
3,2017-02-16 16:00:00,2017-02-16T16:00:00+0100,31.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5,,,,,,,
4,2017-02-16 19:00:00,2017-02-16T19:00:00+0100,18.0,0.0,100017942,Boulevard Georges Pompidou,,"48.1014223209623, -1.68490237617492",5,,,,,,,


In [9]:
df_merged = transform.remove_column(df_merged, ['status', 'ID', 'geo', 'counter', 'sens'])

df_merged.head()

Unnamed: 0,date,isoDate,counts,name,Temperature,Pluie,Vent,Humidite,Point_rose,Pression,Visibilite
0,2017-02-16 09:00:00,2017-02-16T09:00:00+0100,6.0,Boulevard Georges Pompidou,,,,,,,
1,2017-02-16 12:00:00,2017-02-16T12:00:00+0100,18.0,Boulevard Georges Pompidou,,,,,,,
2,2017-02-16 13:00:00,2017-02-16T13:00:00+0100,11.0,Boulevard Georges Pompidou,,,,,,,
3,2017-02-16 16:00:00,2017-02-16T16:00:00+0100,31.0,Boulevard Georges Pompidou,,,,,,,
4,2017-02-16 19:00:00,2017-02-16T19:00:00+0100,18.0,Boulevard Georges Pompidou,,,,,,,


In [10]:
# Suppression des lignes où la colonne 'Température (°C)' a une valeur NaN
df_cleaned = df_merged.dropna(subset=['Temperature'])
df_cleaned_all = df_cleaned.drop_duplicates(subset=['date'])

df_cleaned_all = transform.remove_column(df_cleaned_all, 'isoDate')
# Affichage des premières lignes pour vérifier le résultat
df_cleaned_all.head()

Unnamed: 0,date,counts,name,Temperature,Pluie,Vent,Humidite,Point_rose,Pression,Visibilite
35634,2023-11-01 08:00:00,6.0,Rennes Rue d'Isly V1,11.01112,2.91,,88.0,9.1,997.8,14.0
35669,2023-11-07 20:00:00,96.0,Rennes Rue d'Isly V1,8.089,1.0,,83.0,5.3,1017.1,50.0
35705,2023-11-15 10:00:00,37.0,Rennes Rue d'Isly V1,11.0911,1.0,,90.0,9.4,1024.0,18.0
35744,2023-11-25 02:00:00,4.0,Rennes Rue d'Isly V1,4.145,1.0,,83.0,1.5,1022.9,30.0
36248,2024-03-05 17:00:00,84.0,Rennes Rue d'Isly V1,10.3911,1.0,,65.0,4.0,1018.2,55.0


In [11]:
df_cleaned_all.count()

date           183
counts         178
name           183
Temperature    183
Pluie          177
Vent            25
Humidite       183
Point_rose     183
Pression       183
Visibilite     183
dtype: int64

In [12]:
df_cleaned_all = df_cleaned_all.drop(df_cleaned_all[df_cleaned_all['Temperature'] == 'nn'].index)
df_cleaned_all = df_cleaned_all.drop(df_cleaned_all[df_cleaned_all['Pluie'] == 'nn'].index)
df_cleaned_all = df_cleaned_all.drop(df_cleaned_all[df_cleaned_all['Humidite'] == 'nn'].index)
df_cleaned_all = df_cleaned_all.drop(df_cleaned_all[df_cleaned_all['Point_rose'] == 'nn'].index)
df_cleaned_all = df_cleaned_all.drop(df_cleaned_all[df_cleaned_all['Pression'] == 'Visibilite'].index)

In [13]:
df_cleaned_all = transform.remove_currency_symbols(df_cleaned_all, ['Point_rose'])

Traitement de la colonne: Point_rose


In [14]:
df_cleaned_all[df_cleaned_all['Point_rose'] == '1008.8\n\n=']

Unnamed: 0,date,counts,name,Temperature,Pluie,Vent,Humidite,Point_rose,Pression,Visibilite


In [15]:
df_cleaned_all['Temperature'] = df_cleaned_all['Temperature'].astype(float)
df_cleaned_all['Pluie'] = df_cleaned_all['Pluie'].astype(float)
df_cleaned_all['Vent'] = df_cleaned_all['Vent'].astype(float)
df_cleaned_all['Humidite'] = df_cleaned_all['Humidite'].astype(float)
df_cleaned_all['Point_rose'] = df_cleaned_all['Point_rose'].astype(float)
df_cleaned_all.info()

<class 'pandas.core.frame.DataFrame'>
Index: 183 entries, 35634 to 129112
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   date         183 non-null    datetime64[ns]
 1   counts       178 non-null    float64       
 2   name         183 non-null    object        
 3   Temperature  183 non-null    float64       
 4   Pluie        177 non-null    float64       
 5   Vent         25 non-null     float64       
 6   Humidite     183 non-null    float64       
 7   Point_rose   183 non-null    float64       
 8   Pression     183 non-null    float64       
 9   Visibilite   183 non-null    float64       
dtypes: datetime64[ns](1), float64(8), object(1)
memory usage: 15.7+ KB


# Connection bdd

In [16]:
from Load.DataBaseConnection import DataBaseConnection
import mysql.connector
from sqlalchemy import create_engine

In [None]:
# Informations de connexion
DB_USER = 'root'
DB_PASSWORD = 'admin'
DB_HOST = 'localhost'
DB_PORT = 3306
DB_NAME = 'ETL'

# Créer une connexion à MySQL
engine = create_engine(f"mysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}")

print(engine)

# Vérifier la connexion
try:
    with engine.connect() as conn:
        print("Connexion réussie à la base de données")
except Exception as e:
    print(f"Erreur de connexion : {e}")


Engine(mysql://root:***@localhost:3306/ETL)
Connexion réussie à la base de données


In [28]:
# Insérer les données dans MySQL
try:
    with engine.connect() as connection:  # Assure que la connexion reste ouverte
        df_cleaned_all.to_sql('weather_pieton_count_data', con=connection, if_exists='append', index=False)
    print("Données insérées avec succès")
except Exception as e:
    print(f"Erreur lors de l'insertion des données : {e}")


Données insérées avec succès
