## Partie pour uploader sur GCP des fichiers déjà concaténés

In [None]:
!pip install pandas-gbq

In [None]:
import dotenv
import os
import pandas as pd
import json
import requests
from datetime import datetime
from pandas_gbq import to_gbq
dotenv.load_dotenv()
path = os.getenv("PATH_DATA")

In [None]:
# List files in the directory
files = os.listdir(path)
files.remove("Joined_DF_December.csv")
files.remove("Joined_DF_January.csv")
files.remove("Joined_DF_February.csv")
files.remove("Joined_DF_April.csv")

files

In [None]:
def load_file(file):
    df = pd.read_csv(path+f"\\{file}", header=None)
    return df

def call_data(good_columns= False):
    api_url = "https://data.lillemetropole.fr/data/ogcapi/collections/ilevia:vlille_temps_reel/items?f=geojson&limit=-1"
    api_call = requests.get(api_url)
    api_data = api_call.text
    api_data = json.loads(api_data)
    df = [feature for feature in api_data["features"]]
    df = pd.json_normalize(df)
    if good_columns is True:
        good_columns = df.columns.tolist()
        good_columns.append("Scrapped_Date")
        good_columns.append("Scrapped_Hour")
        return good_columns
    else : 
        return df

def preprocess_file(df): 
    good_columns = call_data(good_columns=True)
    df.columns = good_columns
    drop_columns = [
    "type",
    "@typeName",
    "geometry.type",
    "geometry.@name",
    "geometry.@srs",
    "properties.code_insee",
    "properties.type",
    ]
    df.drop(columns=drop_columns, inplace=True)
    df.rename(columns={"@id": "ID_station"}, inplace=True)
    df["ID_station"] = df["ID_station"].str.replace("vlille_temps_reel.", "", regex=False)
    df.columns = df.columns.str.replace(".", "_", regex=False)
    df["Date_Scrapping"] = df["Scrapped_Date"].str.replace("_", "/", regex=False)
    df["Date_Scrapping"] = pd.to_datetime(df["Date_Scrapping"], format="%d/%m/%Y")
    df["Heure_Min_Scrapping"] = df["Scrapped_Hour"].str.replace("_", ":", regex=False)
    df["Heure_Min_Scrapping"] = df["Heure_Min_Scrapping"].str.replace(".c", "", regex=False)
    df["Heure_Min_Scrapping"] = pd.to_datetime(df["Heure_Min_Scrapping"], format="%H:%M").dt.time
    df.drop(columns=["Scrapped_Date", "Scrapped_Hour"], inplace=True)
    df["Date_Paris"] = pd.to_datetime(df["properties_date_modification"]).dt.tz_convert("Europe/Paris")
    return df
def to_bq(df):
    """Function to format df_monthly, transform it, and upload to BigQuery.
    Args:
        None
    Returns:
        None
    """
    try : 
        to_gbq(
            df,
            "data_table_vlille_59000.table_2025",
            project_id="dash-vlille",
            if_exists="append",
        )
        return "Successfully uploaded to BQ."
    except Exception as e:
        return f"Failed to upload to BQ: {e}"

for file in files:
    print("Démarrage du traitement du fichier :", file)
    df = load_file(file)
    df = preprocess_file(df)
    print(f"Uploading {file} to GCP...")
    result = to_bq(df)
    print(result)

In [None]:
df.info()
df.head()
