In [16]:
import requests
import pandas as pd
import time
import logging

In [None]:
# Configurer les journaux pour le débogage
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Configuration
API_KEY = "155b570e2029566631d2753469f0e2dcc34f3372f228027689d2f69b29f3f771"  # Remplacez par votre clé API OpenAQ
COUNTRY_CODE = "FR"  # Code ISO pour la France
LIMIT = 1000  # Nombre maximum d'enregistrements par page
TARGET_RECORDS = 10000  # Nombre minimum d'enregistrements à récupérer
POLLUTANT = "pm25"  # Polluant ciblé (PM2.5)

In [20]:
# Fonction pour récupérer toutes les stations disponibles
def fetch_locations() -> pd.DataFrame:
    url = "https://api.openaq.org/v3/locations"
    headers = {"X-API-Key": API_KEY}
    all_locations = []
    page = 1
    
    while True:
        params = {
            "country_id": COUNTRY_CODE,
            "limit": LIMIT,
            "page": page
        }
        try:
            response = requests.get(url, headers=headers, params=params, timeout=10)
            response.raise_for_status()
            data = response.json()
            results = data.get("results", [])
            if not results:
                print(f"No more locations at page {page}")
                break
            all_locations.extend(results)
            print(f"Fetched {len(results)} locations at page {page}")
            page += 1
            time.sleep(1)
        except requests.exceptions.HTTPError as e:
            print(f"HTTP error fetching locations: {e}")
            break
    df = pd.DataFrame(all_locations)
    print(f"Total locations fetched: {len(df)}")
    return df

In [21]:
# Étape 1 : Vérifier les stations disponibles
print("Fetching available locations in France...")
locations_df = fetch_locations()
if not locations_df.empty:
    print("Available locations:")
    print(locations_df[['id', 'name', 'city', 'country']].to_string())
    # Chercher une station en France (basé sur 'country' ou 'name')
    french_locations = locations_df[locations_df['country'] == 'FR']
    if not french_locations.empty:
        print(f"Found {len(french_locations)} French locations:")
        print(french_locations[['id', 'name', 'city']].head().to_string())
    else:
        print("No French locations found in the data. Check API data.")
else:
    print("No locations found. Check country_id or API key.")

Fetching available locations in France...
Fetched 1000 locations at page 1
Fetched 1000 locations at page 2
Fetched 1000 locations at page 3
Fetched 1000 locations at page 4
Fetched 1000 locations at page 5
Fetched 1000 locations at page 6
Fetched 1000 locations at page 7
Fetched 1000 locations at page 8
Fetched 1000 locations at page 9
Fetched 1000 locations at page 10
Fetched 1000 locations at page 11
Fetched 1000 locations at page 12
Fetched 1000 locations at page 13
Fetched 1000 locations at page 14
Fetched 1000 locations at page 15
Fetched 1000 locations at page 16
Fetched 1000 locations at page 17
Fetched 1000 locations at page 18
Fetched 1000 locations at page 19
Fetched 1000 locations at page 20
Fetched 309 locations at page 21
No more locations at page 22
Total locations fetched: 20309
Available locations:


KeyError: "['city'] not in index"

In [26]:
locations_df[:10]

Unnamed: 0,id,name,locality,timezone,country,owner,provider,isMobile,isMonitor,instruments,sensors,coordinates,licenses,bounds,distance,datetimeFirst,datetimeLast
0,3,NMA - Nima,,Africa/Accra,"{'id': 152, 'code': 'GH', 'name': 'Ghana'}","{'id': 4, 'name': 'Unknown Governmental Organi...","{'id': 209, 'name': 'Dr. Raphael E. Arku and C...",False,True,"[{'id': 2, 'name': 'Government Monitor'}]","[{'id': 6, 'name': 'pm10 µg/m³', 'parameter': ...","{'latitude': 5.58389, 'longitude': -0.19968}",,"[-0.19968, 5.58389, -0.19968, 5.58389]",,,
1,4,NMT - Nima,,Africa/Accra,"{'id': 152, 'code': 'GH', 'name': 'Ghana'}","{'id': 4, 'name': 'Unknown Governmental Organi...","{'id': 209, 'name': 'Dr. Raphael E. Arku and C...",False,True,"[{'id': 2, 'name': 'Government Monitor'}]","[{'id': 7, 'name': 'pm10 µg/m³', 'parameter': ...","{'latitude': 5.58165, 'longitude': -0.19898}",,"[-0.19898, 5.58165, -0.19898, 5.58165]",,,
2,5,JTA - Jamestown,,Africa/Accra,"{'id': 152, 'code': 'GH', 'name': 'Ghana'}","{'id': 4, 'name': 'Unknown Governmental Organi...","{'id': 209, 'name': 'Dr. Raphael E. Arku and C...",False,True,"[{'id': 2, 'name': 'Government Monitor'}]","[{'id': 10, 'name': 'pm10 µg/m³', 'parameter':...","{'latitude': 5.5401139, 'longitude': -0.2103972}",,"[-0.2103972, 5.5401139, -0.2103972, 5.5401139]",,,
3,6,ADT - Asylum Down,,Africa/Accra,"{'id': 152, 'code': 'GH', 'name': 'Ghana'}","{'id': 4, 'name': 'Unknown Governmental Organi...","{'id': 209, 'name': 'Dr. Raphael E. Arku and C...",False,True,"[{'id': 2, 'name': 'Government Monitor'}]","[{'id': 11, 'name': 'pm10 µg/m³', 'parameter':...","{'latitude': 5.570722, 'longitude': -0.2120555}",,"[-0.2120555, 5.570722, -0.2120555, 5.570722]",,,
4,7,ADEPA - Asylum Down,,Africa/Accra,"{'id': 152, 'code': 'GH', 'name': 'Ghana'}","{'id': 4, 'name': 'Unknown Governmental Organi...","{'id': 209, 'name': 'Dr. Raphael E. Arku and C...",False,True,"[{'id': 2, 'name': 'Government Monitor'}]","[{'id': 14, 'name': 'pm10 µg/m³', 'parameter':...","{'latitude': 5.567833, 'longitude': -0.2040278}",,"[-0.2040278, 5.567833, -0.2040278, 5.567833]",,,
5,8,ADA - Asylum Down,,Africa/Accra,"{'id': 152, 'code': 'GH', 'name': 'Ghana'}","{'id': 4, 'name': 'Unknown Governmental Organi...","{'id': 209, 'name': 'Dr. Raphael E. Arku and C...",False,True,"[{'id': 2, 'name': 'Government Monitor'}]","[{'id': 16, 'name': 'pm10 µg/m³', 'parameter':...","{'latitude': 5.566722, 'longitude': -0.2077778}",,"[-0.2077778, 5.566722, -0.2077778, 5.566722]",,,
6,9,ELC - East Legon,,Africa/Accra,"{'id': 152, 'code': 'GH', 'name': 'Ghana'}","{'id': 4, 'name': 'Unknown Governmental Organi...","{'id': 209, 'name': 'Dr. Raphael E. Arku and C...",False,True,"[{'id': 2, 'name': 'Government Monitor'}]","[{'id': 17, 'name': 'pm10 µg/m³', 'parameter':...","{'latitude': 5.6335571, 'longitude': -0.1651875}",,"[-0.1651875, 5.6335571, -0.1651875, 5.6335571]",,,
7,10,ELT - East Legon,,Africa/Accra,"{'id': 152, 'code': 'GH', 'name': 'Ghana'}","{'id': 4, 'name': 'Unknown Governmental Organi...","{'id': 209, 'name': 'Dr. Raphael E. Arku and C...",False,True,"[{'id': 2, 'name': 'Government Monitor'}]","[{'id': 20, 'name': 'pm10 µg/m³', 'parameter':...","{'latitude': 5.6408995, 'longitude': -0.1695437}",,"[-0.1695437, 5.6408995, -0.1695437, 5.6408995]",,,
8,11,ELA - East Legon,,Africa/Accra,"{'id': 152, 'code': 'GH', 'name': 'Ghana'}","{'id': 4, 'name': 'Unknown Governmental Organi...","{'id': 209, 'name': 'Dr. Raphael E. Arku and C...",False,True,"[{'id': 2, 'name': 'Government Monitor'}]","[{'id': 22, 'name': 'pm10 µg/m³', 'parameter':...","{'latitude': 5.6408137, 'longitude': -0.1579063}",,"[-0.1579063, 5.6408137, -0.1579063, 5.6408137]",,,
9,12,SPARTAN - IIT Kanpur,,Asia/Kolkata,"{'id': 9, 'code': 'IN', 'name': 'India'}","{'id': 4, 'name': 'Unknown Governmental Organi...","{'id': 226, 'name': 'Spartan'}",False,True,"[{'id': 2, 'name': 'Government Monitor'}]","[{'id': 23, 'name': 'pm25 µg/m³', 'parameter':...","{'latitude': 26.519, 'longitude': 80.233}",,"[80.233, 26.519, 80.233, 26.519]",,,


In [24]:
locations_df["country"].value_counts()

country
{'id': 155, 'code': 'US', 'name': 'United States'}    4904
{'id': 10, 'code': 'CN', 'name': 'China'}             1874
{'id': 190, 'code': 'JP', 'name': 'Japan'}            1606
{'id': 22, 'code': 'FR', 'name': 'France'}             926
{'id': 67, 'code': 'ES', 'name': 'Spain'}              882
                                                      ... 
{'id': 64, 'code': 'AZ', 'name': 'Azerbaijan'}           1
{'id': 105, 'code': 'QA', 'name': 'Qatar'}               1
{'id': 41, 'code': 'UZ', 'name': 'Uzbekistan'}           1
{'id': 182, 'code': 'MG', 'name': 'Madagascar'}          1
{'id': 18, 'code': 'MW', 'name': 'Malawi'}               1
Name: count, Length: 135, dtype: int64

In [27]:
locations_df.columns

Index(['id', 'name', 'locality', 'timezone', 'country', 'owner', 'provider',
       'isMobile', 'isMonitor', 'instruments', 'sensors', 'coordinates',
       'licenses', 'bounds', 'distance', 'datetimeFirst', 'datetimeLast'],
      dtype='object')

In [28]:
locations_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20309 entries, 0 to 20308
Data columns (total 17 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   id             20309 non-null  int64 
 1   name           20281 non-null  object
 2   locality       11833 non-null  object
 3   timezone       20309 non-null  object
 4   country        20309 non-null  object
 5   owner          20309 non-null  object
 6   provider       20309 non-null  object
 7   isMobile       20309 non-null  bool  
 8   isMonitor      20309 non-null  bool  
 9   instruments    20309 non-null  object
 10  sensors        20309 non-null  object
 11  coordinates    20309 non-null  object
 12  licenses       14098 non-null  object
 13  bounds         20309 non-null  object
 14  distance       0 non-null      object
 15  datetimeFirst  19947 non-null  object
 16  datetimeLast   19947 non-null  object
dtypes: bool(2), int64(1), object(14)
memory usage: 2.4+ MB


In [31]:
# Configuration
API_URL = "https://api.openaq.org/v3/measurements"
API_KEY = "155b570e2029566631d2753469f0e2dcc34f3372f228027689d2f69b29f3f771"  # Remplacez par votre clé API OpenAQ
PARAMETERS = ["pm25", "no2", "o3"]  # Polluants ciblés
COUNTRY_CODE = "US"  # Code ISO pour la France
LIMIT = 1000  # Nombre maximum d'enregistrements par page
TARGET_RECORDS = 10000  # Nombre minimum d'enregistrements à récupérer

In [37]:
# Fonction pour récupérer les données
def fetch_openaq_data() -> pd.DataFrame:
    headers = {"X-API-Key": API_KEY}
    all_data = []
    page = 1
    total_fetched = 0
    
    while total_fetched < TARGET_RECORDS:
        params = {
            "country": COUNTRY_CODE,
            "parameter": PARAMETERS,
            "limit": LIMIT,
            "page": page
        }
        
        try:
            response = requests.get(API_URL, headers=headers, params=params, timeout=10)
            response.raise_for_status()
            data = response.json()
            
            # Vérifier si des données sont retournées
            if not data.get("results"):
                print(f"No more data available at page {page}")
                break
                
            # Ajouter les résultats à la liste
            all_data.extend(data["results"])
            total_fetched += len(data["results"])
            print(f"Fetched {total_fetched} records so far...")
            
            # Passer à la page suivante
            page += 1
            
            # Respecter les limites de taux de l'API
            time.sleep(1)
            
        except requests.exceptions.RequestException as e:
            print(f"Error fetching data: {e}")
            break
    
    # Convertir en DataFrame
    df = pd.DataFrame(all_data)
    print(f"Total records fetched: {len(df)}")
    return df

In [38]:
# Exécuter la récupération des données
df = fetch_openaq_data()

# Sauvegarder les données (optionnel)
df.to_csv("openaq_france_data_v2.csv", index=False)

Error fetching data: 404 Client Error: Not Found for url: https://api.openaq.org/v3/measurements?country=US&parameter=pm25&parameter=no2&parameter=o3&limit=1000&page=1
Total records fetched: 0


In [35]:
# Fonction pour récupérer toutes les stations disponibles
def fetch_locations() -> pd.DataFrame:
    url = "https://api.openaq.org/v3/locations"
    headers = {"X-API-Key": API_KEY}
    all_locations = []
    page = 1
    
    while True:
        params = {
            "country": COUNTRY_CODE,
            "limit": LIMIT,
            "page": page
        }
        try:
            response = requests.get(url, headers=headers, params=params, timeout=10)
            response.raise_for_status()
            data = response.json()
            results = data.get("results", [])
            if not results:
                print(f"No more locations at page {page}")
                break
            all_locations.extend(results)
            print(f"Fetched {len(results)} locations at page {page}")
            page += 1
            time.sleep(1)
        except requests.exceptions.HTTPError as e:
            print(f"HTTP error fetching locations: {e}")
            break
    df = pd.DataFrame(all_locations)
    print(f"Total locations fetched: {len(df)}")
    return df

In [36]:
# Étape 1 : Vérifier les stations disponibles
print("Fetching available locations in the USA...")
locations_df = fetch_locations()
locations_df

Fetching available locations in the USA...
Fetched 1000 locations at page 1
Fetched 1000 locations at page 2
Fetched 1000 locations at page 3
Fetched 1000 locations at page 4
Fetched 1000 locations at page 5
Fetched 1000 locations at page 6
Fetched 1000 locations at page 7
Fetched 1000 locations at page 8
Fetched 1000 locations at page 9
Fetched 1000 locations at page 10
Fetched 1000 locations at page 11
Fetched 1000 locations at page 12
Fetched 1000 locations at page 13
Fetched 1000 locations at page 14
Fetched 1000 locations at page 15
Fetched 1000 locations at page 16
Fetched 1000 locations at page 17
Fetched 1000 locations at page 18
Fetched 1000 locations at page 19
Fetched 1000 locations at page 20
Fetched 309 locations at page 21
No more locations at page 22
Total locations fetched: 20309


Unnamed: 0,id,name,locality,timezone,country,owner,provider,isMobile,isMonitor,instruments,sensors,coordinates,licenses,bounds,distance,datetimeFirst,datetimeLast
0,3,NMA - Nima,,Africa/Accra,"{'id': 152, 'code': 'GH', 'name': 'Ghana'}","{'id': 4, 'name': 'Unknown Governmental Organi...","{'id': 209, 'name': 'Dr. Raphael E. Arku and C...",False,True,"[{'id': 2, 'name': 'Government Monitor'}]","[{'id': 6, 'name': 'pm10 µg/m³', 'parameter': ...","{'latitude': 5.58389, 'longitude': -0.19968}",,"[-0.19968, 5.58389, -0.19968, 5.58389]",,,
1,4,NMT - Nima,,Africa/Accra,"{'id': 152, 'code': 'GH', 'name': 'Ghana'}","{'id': 4, 'name': 'Unknown Governmental Organi...","{'id': 209, 'name': 'Dr. Raphael E. Arku and C...",False,True,"[{'id': 2, 'name': 'Government Monitor'}]","[{'id': 7, 'name': 'pm10 µg/m³', 'parameter': ...","{'latitude': 5.58165, 'longitude': -0.19898}",,"[-0.19898, 5.58165, -0.19898, 5.58165]",,,
2,5,JTA - Jamestown,,Africa/Accra,"{'id': 152, 'code': 'GH', 'name': 'Ghana'}","{'id': 4, 'name': 'Unknown Governmental Organi...","{'id': 209, 'name': 'Dr. Raphael E. Arku and C...",False,True,"[{'id': 2, 'name': 'Government Monitor'}]","[{'id': 10, 'name': 'pm10 µg/m³', 'parameter':...","{'latitude': 5.5401139, 'longitude': -0.2103972}",,"[-0.2103972, 5.5401139, -0.2103972, 5.5401139]",,,
3,6,ADT - Asylum Down,,Africa/Accra,"{'id': 152, 'code': 'GH', 'name': 'Ghana'}","{'id': 4, 'name': 'Unknown Governmental Organi...","{'id': 209, 'name': 'Dr. Raphael E. Arku and C...",False,True,"[{'id': 2, 'name': 'Government Monitor'}]","[{'id': 11, 'name': 'pm10 µg/m³', 'parameter':...","{'latitude': 5.570722, 'longitude': -0.2120555}",,"[-0.2120555, 5.570722, -0.2120555, 5.570722]",,,
4,7,ADEPA - Asylum Down,,Africa/Accra,"{'id': 152, 'code': 'GH', 'name': 'Ghana'}","{'id': 4, 'name': 'Unknown Governmental Organi...","{'id': 209, 'name': 'Dr. Raphael E. Arku and C...",False,True,"[{'id': 2, 'name': 'Government Monitor'}]","[{'id': 14, 'name': 'pm10 µg/m³', 'parameter':...","{'latitude': 5.567833, 'longitude': -0.2040278}",,"[-0.2040278, 5.567833, -0.2040278, 5.567833]",,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20304,4565934,Manila Observatory,,Asia/Manila,"{'id': 183, 'code': 'PH', 'name': 'Philippines'}","{'id': 12, 'name': 'AirGradient'}","{'id': 66, 'name': 'AirGradient'}",False,False,"[{'id': 7, 'name': 'AirGradient Sensor'}]","[{'id': 13191002, 'name': 'pm1 µg/m³', 'parame...","{'latitude': 14.6354, 'longitude': 121.0779}","[{'id': 41, 'name': 'CC BY 4.0', 'attribution'...","[121.0779, 14.6354, 121.0779, 14.6354]",,"{'utc': '2025-05-30T07:00:00Z', 'local': '2025...","{'utc': '2025-05-30T10:00:00Z', 'local': '2025..."
20305,4566426,Punjab Food Authority,,Asia/Karachi,"{'id': 109, 'code': 'PK', 'name': 'Pakistan'}","{'id': 12, 'name': 'AirGradient'}","{'id': 66, 'name': 'AirGradient'}",False,False,"[{'id': 7, 'name': 'AirGradient Sensor'}]","[{'id': 13191309, 'name': 'pm1 µg/m³', 'parame...","{'latitude': 33.6304566, 'longitude': 73.0729694}","[{'id': 41, 'name': 'CC BY 4.0', 'attribution'...","[73.0729694, 33.6304566, 73.0729694, 33.6304566]",,"{'utc': '2025-05-30T08:00:00Z', 'local': '2025...","{'utc': '2025-05-30T10:00:00Z', 'local': '2025..."
20306,4566427,"Abdullah Khokhar, house Johar Town",,Asia/Karachi,"{'id': 109, 'code': 'PK', 'name': 'Pakistan'}","{'id': 12, 'name': 'AirGradient'}","{'id': 66, 'name': 'AirGradient'}",False,False,"[{'id': 7, 'name': 'AirGradient Sensor'}]","[{'id': 13191314, 'name': 'pm1 µg/m³', 'parame...","{'latitude': 31.460056, 'longitude': 74.280672}","[{'id': 41, 'name': 'CC BY 4.0', 'attribution'...","[74.280672, 31.460056, 74.280672, 31.460056]",,"{'utc': '2025-05-30T08:00:00Z', 'local': '2025...","{'utc': '2025-05-30T10:00:00Z', 'local': '2025..."
20307,4567363,Public Health Engineering,,Asia/Karachi,"{'id': 109, 'code': 'PK', 'name': 'Pakistan'}","{'id': 12, 'name': 'AirGradient'}","{'id': 66, 'name': 'AirGradient'}",False,False,"[{'id': 7, 'name': 'AirGradient Sensor'}]","[{'id': 13191603, 'name': 'pm1 µg/m³', 'parame...","{'latitude': 33.6335684, 'longitude': 73.0614477}","[{'id': 41, 'name': 'CC BY 4.0', 'attribution'...","[73.0614477, 33.6335684, 73.0614477, 33.6335684]",,"{'utc': '2025-05-30T09:00:00Z', 'local': '2025...","{'utc': '2025-05-30T10:00:00Z', 'local': '2025..."


In [43]:
locations_df.loc[:5,"sensors"].values

array([list([{'id': 6, 'name': 'pm10 µg/m³', 'parameter': {'id': 1, 'name': 'pm10', 'units': 'µg/m³', 'displayName': 'PM10'}}, {'id': 5, 'name': 'pm25 µg/m³', 'parameter': {'id': 2, 'name': 'pm25', 'units': 'µg/m³', 'displayName': 'PM2.5'}}]),
       list([{'id': 7, 'name': 'pm10 µg/m³', 'parameter': {'id': 1, 'name': 'pm10', 'units': 'µg/m³', 'displayName': 'PM10'}}, {'id': 8, 'name': 'pm25 µg/m³', 'parameter': {'id': 2, 'name': 'pm25', 'units': 'µg/m³', 'displayName': 'PM2.5'}}]),
       list([{'id': 10, 'name': 'pm10 µg/m³', 'parameter': {'id': 1, 'name': 'pm10', 'units': 'µg/m³', 'displayName': 'PM10'}}, {'id': 9, 'name': 'pm25 µg/m³', 'parameter': {'id': 2, 'name': 'pm25', 'units': 'µg/m³', 'displayName': 'PM2.5'}}]),
       list([{'id': 11, 'name': 'pm10 µg/m³', 'parameter': {'id': 1, 'name': 'pm10', 'units': 'µg/m³', 'displayName': 'PM10'}}, {'id': 12, 'name': 'pm25 µg/m³', 'parameter': {'id': 2, 'name': 'pm25', 'units': 'µg/m³', 'displayName': 'PM2.5'}}]),
       list([{'id': 1

In [45]:
count_pm25 = locations_df["sensors"].apply(
    lambda sensors: any(sensor.get("parameter", {}).get("name") == "pm25" for sensor in sensors)
).sum()
count_pm25

14685