In [6]:
# Import the dependencies.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from citipy import citipy
from scipy.stats import linregress
import requests
import time
from datetime import datetime

# Import API key
from config import weather_api_key

# Turn off warning messages
import warnings
warnings.filterwarnings("ignore")

#  Import and read the charity_data.csv.
df = pd.read_csv("100_cities_for_api")
df.tail()

Unnamed: 0,ID,Country,City,Population,Lat,Lng
95,95,sy,Aleppo,2139878,36.25,37.5
96,96,fr,Paris,2110694,48.8534,2.3488
97,97,cn,Jinan,2069266,36.6683,116.9972
98,98,cn,Tangshan,2054526,39.6333,118.1833
99,99,cn,Dalian,2035307,38.9122,121.6022


In [7]:
# Define a list of cities to retrieve data for
cities = df['City'].tolist()
cities

['Tokyo',
 'Shanghai',
 'Bombay',
 'Karachi',
 'Delhi',
 'New Delhi',
 'Manila',
 'Moscow',
 'Seoul',
 'Sao Paulo',
 'Istanbul',
 'Lagos',
 'Mexico',
 'Jakarta',
 'New York',
 'Kinshasa',
 'Cairo',
 'Lima',
 'Peking',
 'London',
 'Bogota',
 'Dhaka',
 'Lahore',
 'Rio De Janeiro',
 'Baghdad',
 'Bangkok',
 'Bangalore',
 'Santiago',
 'Calcutta',
 'Toronto',
 'Rangoon',
 'Sydney',
 'Madras',
 'Wuhan',
 'Saint Petersburg',
 'Chongqing',
 'Xian',
 'Chengdu',
 'Los Angeles',
 'Alexandria',
 'Tianjin',
 'Melbourne',
 'Ahmadabad',
 'Abidjan',
 'Kano',
 'Casablanca',
 'Hyderabad',
 'Ibadan',
 'Singapore',
 'Ankara',
 'Shenyang',
 'Riyadh',
 'Ho Chi Minh City',
 'Cape Town',
 'Berlin',
 'Montreal',
 'Harbin',
 'Guangzhou',
 'Durban',
 'Madrid',
 'Nanjing',
 'Kabul',
 'Pune',
 'Surat',
 'Chicago',
 'Kanpur',
 'Omdurman',
 'Luanda',
 'Addis Abeba',
 'Nairobi',
 'Taiyuan',
 'Jaipur',
 'Salvador',
 'Dakar',
 'Dar Es Salaam',
 'Rome',
 'Mogadishu',
 'Jeddah',
 'Changchun',
 'Taipei',
 'Kiev',
 'Faisala

In [8]:
# Create an empty DataFrame to store the air pollution data
pollution_data = pd.DataFrame()

pollution_data = []

# Set up the url with unix dates(Jan. 1 - Dec. 31, 2022) for historical data
url = f'http://api.openweathermap.org/data/2.5/air_pollution/history?lat=0&lon=0&start=1641013200&end=1672527600&appid=' + weather_api_key
response = requests.get(url)

# Loop through each city and retrieve the historical air pollution data
for city in cities:


    # Parse the JSON response and print the air pollution data for the city
    data = response.json()
    pollutants = data['list']
    for p in pollutants:
        pollution_data.append({
            'city': city,
            'date': p['dt'],
            'aqi': p['main']['aqi'],
            'so2': p['components']['so2'],
            'no2': p['components']['no2'],
            'pm10': p['components']['pm10'],
            'pm2.5': p['components']['pm2_5'],
            'o3': p['components']['o3'],
            'co': p['components']['co']
        })
    
    # Create a dataframe for the city's data and append it to the list
    pollution_df = pd.DataFrame(pollution_data)

In [26]:
pollution_df

Unnamed: 0,city,date,aqi,so2,no2,pm10,pm2.5,o3,co
0,Tokyo,1606266000,1,0.16,0.04,11.42,3.53,37.55,257.02
1,Tokyo,1606269600,1,0.16,0.04,10.66,3.50,38.27,253.68
2,Tokyo,1606273200,1,0.17,0.04,10.49,3.56,39.70,243.66
3,Tokyo,1606276800,1,0.18,0.05,10.84,3.72,41.49,240.33
4,Tokyo,1606280400,1,0.18,0.05,10.94,3.80,42.92,236.99
...,...,...,...,...,...,...,...,...,...
88695,Dalian,1609441200,1,0.13,0.04,9.53,2.99,25.75,273.71
88696,Dalian,1609444800,1,0.13,0.04,10.15,3.01,26.11,277.04
88697,Dalian,1609448400,1,0.13,0.05,10.53,2.99,26.11,277.04
88698,Dalian,1609452000,1,0.13,0.05,10.11,2.87,26.11,273.71


In [27]:
# Convert unix timestamp to utc
pollution_df['date'] = pd.to_datetime(pollution_df['date'], unit = 's')
pollution_df

Unnamed: 0,city,date,aqi,so2,no2,pm10,pm2.5,o3,co
0,Tokyo,2020-11-25 01:00:00,1,0.16,0.04,11.42,3.53,37.55,257.02
1,Tokyo,2020-11-25 02:00:00,1,0.16,0.04,10.66,3.50,38.27,253.68
2,Tokyo,2020-11-25 03:00:00,1,0.17,0.04,10.49,3.56,39.70,243.66
3,Tokyo,2020-11-25 04:00:00,1,0.18,0.05,10.84,3.72,41.49,240.33
4,Tokyo,2020-11-25 05:00:00,1,0.18,0.05,10.94,3.80,42.92,236.99
...,...,...,...,...,...,...,...,...,...
88695,Dalian,2020-12-31 19:00:00,1,0.13,0.04,9.53,2.99,25.75,273.71
88696,Dalian,2020-12-31 20:00:00,1,0.13,0.04,10.15,3.01,26.11,277.04
88697,Dalian,2020-12-31 21:00:00,1,0.13,0.05,10.53,2.99,26.11,277.04
88698,Dalian,2020-12-31 22:00:00,1,0.13,0.05,10.11,2.87,26.11,273.71


In [30]:
# Save dataframe to csv
pollution_df.to_csv(f'pollution', index=False)