In [24]:
from sqlalchemy import create_engine
from psycopg2 import connect, sql
from psycopg2.extras import execute_values
import json
import pandas as pd
import requests
import time

In [25]:
TABLE_NAME = "activites"

DB_URI ="postgresql://arthurdercq:qsdfgqsdfg@localhost:5432/postgres"
AUTH_URL ="https://www.strava.com/oauth/token"
STRAVA_CLIENT_ID="114062"
STRAVA_CLIENT_SECRET="3ff69d1668764ef3f5ebdfbe71ecc04bf694d74c"
STRAVA_REFRESH_TOKEN="e4a08d7904a3eedba9782ee6aa7b8644898beaff"


In [26]:
HOST ="localhost"
DATABASE ="postgres"
USER ="arthurdercq"
PASSWORD="qsdfgqsdfg"
PORT ="5432"

In [27]:

def get_all_activity_ids_from_db(db_uri, table_name):
    """
    Récupère tous les activity_id présents dans la base PostgreSQL.
    Retourne une liste de strings.
    """
    engine = create_engine(db_uri)
    with engine.connect() as conn:
        df = pd.read_sql(f"SELECT id FROM {table_name} LIMIT 5", conn)

    print("Ids récupérés ✅")
    return df["id"].astype(str).tolist()


In [28]:
def get_strava_header():
    payload = {
        'client_id': STRAVA_CLIENT_ID,
        'client_secret': STRAVA_CLIENT_SECRET,
        'refresh_token': STRAVA_REFRESH_TOKEN,
        'grant_type': "refresh_token",
        'f': 'json'
    }
    res = requests.post(AUTH_URL, data=payload, verify=False)
    access_token = res.json()['access_token']
    header = {'Authorization': 'Bearer ' + access_token}
    return header

In [29]:
def fetch_stream(activity_id, header):

    #Récupère les streams (altitude, distance, latlng, time) d'une activité

    url = f"https://www.strava.com/api/v3/activities/{activity_id}/streams"
    params = {"keys": "latlng,altitude,distance,time", "key_by_type": "true"}
    resp = requests.get(url, headers=header, params=params)
    resp.raise_for_status()
    streams = resp.json()

    latlng = streams.get("latlng", {}).get("data", [])
    altitude = streams.get("altitude", {}).get("data", [])
    distance = streams.get("distance", {}).get("data", [])
    time = streams.get("time", {}).get("data", [])

    # Construction DataFrame
    df_stream = pd.DataFrame({
        "activity_id": activity_id,
        "lat": [pt[0] for pt in latlng] if latlng else None,
        "lon": [pt[1] for pt in latlng] if latlng else None,
        "altitude": altitude,
        "distance_m": distance,
        "time_s": time
    })
    print(f"Stream de l'activité {activity_id} récupéré ✅")

    return df_stream


def fetch_multiple_streams_df(activity_ids, header, max_per_15min=590):
    """
    Récupère les streams pour plusieurs activités Strava et retourne un DataFrame unique.
    Ignore les activités sans stream (ex: workout, natation).
    """
    dfs = []
    count = 0
    no_stream_ids = []
    for i, activity_id in enumerate(activity_ids):
        if count >= max_per_15min:
            print("⏸ Pause 15 minutes pour respecter la limite Strava…")
            time.sleep(15 * 60)
            count = 0
        try:
            df_stream = fetch_stream(activity_id, header)
            if df_stream.empty or df_stream["altitude"].isna().all():
                no_stream_ids.append(activity_id)
            else:
                dfs.append(df_stream)
            count += 1
        except Exception as e:
            print(f"Erreur pour l'activité {activity_id}: {e}")
            no_stream_ids.append(activity_id)
    if dfs:
        result = pd.concat(dfs, ignore_index=True)
    else:
        result = pd.DataFrame()
    print(f"{len(no_stream_ids)} activités sans stream (ignorées).")
    return result

In [30]:
def store_df_streams_in_postgresql(df_streams, host, database, user, password, port, table_name="streams"):
    """
    Stocke un DataFrame de streams Strava dans une table PostgreSQL.
    """
    conn = connect(
        host=host,
        database=database,
        user=user,
        password=password,
        port=port
    )
    cur = conn.cursor()

    # Création de la table si elle n'existe pas
    create_table_query = sql.SQL("""
    CREATE TABLE IF NOT EXISTS {} (
        activity_id VARCHAR(50),
        lat FLOAT,
        lon FLOAT,
        altitude FLOAT,
        distance_m FLOAT,
        time_s FLOAT
    );
    """).format(sql.Identifier(table_name))
    cur.execute(create_table_query)

    # Préparer les données à insérer
    values = [
        (
            row['activity_id'],
            row['lat'],
            row['lon'],
            row['altitude'],
            row['distance_m'],
            row['time_s']
        )
        for _, row in df_streams.iterrows()
    ]

    columns = ('activity_id', 'lat', 'lon', 'altitude', 'distance_m', 'time_s')

    insert_query = sql.SQL("""
        INSERT INTO {} ({})
        VALUES %s
        ON CONFLICT DO NOTHING
    """).format(
        sql.Identifier(table_name),
        sql.SQL(', ').join(map(sql.Identifier, columns))
    )

    execute_values(cur, insert_query.as_string(conn), values)
    conn.commit()
    cur.close()
    print("Streams importés dans PostgreSQL ✅")


In [31]:
def get_existing_stream_ids(db_uri, table_name="streams"):
    engine = create_engine(db_uri)
    with engine.connect() as conn:
        df = pd.read_sql(f"SELECT DISTINCT activity_id FROM {table_name}", conn)
    return set(df["activity_id"].astype(str))


In [36]:


# 1. Récupère tous les IDs insérés
activity_ids = get_all_activity_ids_from_db(DB_URI, TABLE_NAME)
# 2. Récupère le header d'authentification Strava
header = get_strava_header()

# 3. Récupère les streams pour toutes les activités
streams_df = fetch_multiple_streams_df(activity_ids, header)

###Dans update###
# ...avant l'insertion...
existing_stream_ids = get_existing_stream_ids(DB_URI, "streams")
# Filtrer les nouveaux streams
streams_df_to_insert = streams_df[~streams_df["activity_id"].astype(str).isin(existing_stream_ids)]
######

# 4. Stocke les streams dans PostgreSQL
store_df_streams_in_postgresql(streams_df_to_insert, host=HOST, database=DATABASE, user=USER, password=PASSWORD, port=PORT)


Ids récupérés ✅




Stream de l'activité 15447069500 récupéré ✅
Stream de l'activité 15431494748 récupéré ✅
Stream de l'activité 15351665154 récupéré ✅
Stream de l'activité 15341875770 récupéré ✅
Stream de l'activité 15284034306 récupéré ✅
0 activités sans stream (ignorées).
Streams importés dans PostgreSQL ✅


  result = pd.concat(dfs, ignore_index=True)
