In [1]:
import requests
import pandas as pd
import pytz
import time

# Define the URLs for the Toulouse bike availability API
station_info_url = 'https://transport.data.gouv.fr/gbfs/toulouse/station_information.json'
station_status_url = 'https://transport.data.gouv.fr/gbfs/toulouse/station_status.json'

# Define the time interval between data retrievals (in seconds)
interval = 900  # 15 minutes

# Define the total number of data retrievals
num_retrievals = 96  # 24 hours

# Create an empty DataFrame to store the collected data
df_all_data = pd.DataFrame()
n = 0

try:
    for _ in range(num_retrievals):
        try:
            # Make GET requests to the API
            station_info_response = requests.get(station_info_url)
            station_info_response.raise_for_status()
            station_status_response = requests.get(station_status_url)
            station_status_response.raise_for_status()

            # Extract the station and availability data
            station_info_data = station_info_response.json()['data']['stations']
            station_status_data = station_status_response.json()['data']['stations']

            # Convert the lists of dictionaries to DataFrames
            df_stations = pd.DataFrame(station_info_data)
            df_disponibilites = pd.DataFrame(station_status_data)

            # Convert the 'last_reported' column to datetime objects in UTC timezone
            df_disponibilites['last_reported'] = pd.to_datetime(df_disponibilites['last_reported'], unit='s').dt.tz_localize('UTC')

            # Convert the 'last_reported' column to Paris local time
            paris_tz = pytz.timezone('Europe/Paris')
            df_disponibilites['last_reported'] = df_disponibilites['last_reported'].dt.tz_convert(paris_tz)

            # Format the 'last_reported' column as a string without timezone indication
            df_disponibilites['last_reported'] = df_disponibilites['last_reported'].dt.strftime('%d/%m/%Y %H:%M:%S')

            # Merge the station and availability data into a single DataFrame
            df_velo_temps_reel = pd.merge(df_stations, df_disponibilites, how='left', on='station_id')

            # Remove the station code from the 'name' column (starting from the 8th character)
            df_velo_temps_reel['name'] = df_velo_temps_reel['name'].apply(lambda x: x[8:])

            # Append the current data to the overall DataFrame
            df_all_data = pd.concat([df_all_data, df_velo_temps_reel])
            df_all_data.to_csv('bike_data.csv', index=False)
            n += 1
            print(f"Données récupérées avec succès. numéro de récupération = {n}")

        except requests.exceptions.RequestException as e:
            print('An error occurred during the API call:', e)
            print('Retrying in 15 minutes...')
            time.sleep(900)  # Wait for 15 minutes before retrying
            continue

        # Wait for the specified interval before the next retrieval
        time.sleep(interval)

    # Export the collected data to a CSV file
    df_all_data.to_csv('bike_data.csv', index=False)
    print("Data collection complete. Bike data saved to 'bike_data.csv'.")
except KeyboardInterrupt:
    print("Data collection stopped by user.")


Données récupérées avec succès. numéro de récupération = 1
Données récupérées avec succès. numéro de récupération = 2
Données récupérées avec succès. numéro de récupération = 3


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=7c80b052-7ead-4799-b132-a25d57707958' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>