In [1]:
import pandas as pd
import requests
import json
from time import sleep

# Function to fetch World Bank data
def fetch_world_bank_data(indicator, country_code='all', start_year=2000, end_year=2023):
    url = f'https://api.worldbank.org/v2/country/{country_code}/indicator/{indicator}?date={start_year}:{end_year}&format=json&per_page=1000'
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if len(data) > 1:
            return pd.json_normalize(data[1])
        else:
            print(f"No data found for indicator {indicator}")
            return pd.DataFrame()
    else:
        print(f"Failed to fetch data for indicator {indicator}: {response.status_code}")
        return pd.DataFrame()

# Fetch urban population data
urban_population = fetch_world_bank_data('SP.URB.TOTL')

# Fetch PM2.5 air pollution data
pm25_pollution = fetch_world_bank_data('EN.ATM.PM25.MC.M3')

# Function to fetch OpenAQ data with retries
def fetch_openaq_data(city, parameter, start_date='2020-01-01', end_date='2023-01-01', retries=3):
    url = f'https://api.openaq.org/v1/measurements?city={city}&parameter={parameter}&date_from={start_date}&date_to={end_date}&limit=10000'
    for attempt in range(retries):
        response = requests.get(url)
        if response.status_code == 200:
            try:
                data = response.json()
                if 'results' in data:
                    return pd.json_normalize(data['results'])
                else:
                    print(f"No data found for city {city} and parameter {parameter}")
                    return pd.DataFrame()
            except json.JSONDecodeError:
                print(f"Error decoding JSON response for city {city} and parameter {parameter}")
                return pd.DataFrame()
        else:
            print(f"Attempt {attempt + 1} failed: {response.status_code}. Retrying...")
            sleep(2)  # wait before retrying
    print(f"Failed to fetch data for city {city} and parameter {parameter} after {retries} retries.")
    return pd.DataFrame()

# Function to fetch WAQI data
def fetch_waqi_data(city, token, start_date='2020-01-01', end_date='2023-01-01'):
    url = f'https://api.waqi.info/feed/{city}/?token={token}'
    response = requests.get(url)
    if response.status_code == 200:
        try:
            data = response.json()
            if 'data' in data:
                aqi_data = pd.DataFrame(data['data']['iaqi'].items(), columns=['parameter', 'value'])
                aqi_data['city'] = city
                return aqi_data
            else:
                print(f"No data found for city {city}")
                return pd.DataFrame()
        except json.JSONDecodeError:
            print(f"Error decoding JSON response for city {city}")
            return pd.DataFrame()
    else:
        print(f"Failed to fetch WAQI data for city {city}: {response.status_code}")
        return pd.DataFrame()

# List of cities and parameters to fetch data for
cities = ['Delhi', 'Mumbai', 'Kolkata']
parameters = ['pm25', 'no2']
waqi_token = 'YOUR_WAQI_API_TOKEN'  # Replace with your WAQI API token

# Fetch air quality data for the specified cities and parameters
air_quality_data_list = []
for city in cities:
    for parameter in parameters:
        data = fetch_openaq_data(city, parameter)
        if data.empty:  # If OpenAQ fails, try WAQI
            print(f"Trying alternative data source for city {city} and parameter {parameter}")
            data = fetch_waqi_data(city, waqi_token)
        if not data.empty:
            data['city'] = city
            data['parameter'] = parameter
            air_quality_data_list.append(data)

# Combine air quality data
if air_quality_data_list:
    air_quality_data = pd.concat(air_quality_data_list, ignore_index=True)
else:
    air_quality_data = pd.DataFrame()

# Example function to aggregate air quality data to annual averages
def aggregate_air_quality_data(data):
    if not data.empty:
        data['date'] = pd.to_datetime(data['date.utc'])
        data.set_index('date', inplace=True)
        return data.resample('A').mean()
    else:
        return pd.DataFrame()

air_quality_data_aggregated = aggregate_air_quality_data(air_quality_data)

# Combine the datasets (example: merging on year)
if not urban_population.empty and not pm25_pollution.empty and not air_quality_data_aggregated.empty:
    urban_population['year'] = pd.to_datetime(urban_population['date']).dt.year
    pm25_pollution['year'] = pd.to_datetime(pm25_pollution['date']).dt.year
    air_quality_data_aggregated['year'] = air_quality_data_aggregated.index.year

    combined_data = urban_population.merge(pm25_pollution, on=['country', 'year'], suffixes=('_urban', '_pm25'))
    combined_data = combined_data.merge(air_quality_data_aggregated.reset_index(), on='year')

    # Save to CSV
    combined_data.to_csv('urbanization_air_quality_data.csv', index=False)
    print("Data collection complete. Data saved to 'urbanization_air_quality_data.csv'.")
else:
    print("Not enough data to combine into a single dataset.")

Attempt 1 failed: 500. Retrying...
Attempt 2 failed: 500. Retrying...
Attempt 3 failed: 500. Retrying...
Failed to fetch data for city Delhi and parameter pm25 after 3 retries.
Trying alternative data source for city Delhi and parameter pm25


TypeError: string indices must be integers, not 'str'