In [1]:
import requests
import json
from pprint import pprint

In [2]:
import pandas as pd


url = "https://en.wikipedia.org/wiki/List_of_the_100_largest_municipalities_in_Canada_by_population"

# create dataframe from table in above html
can_cities_table = pd.read_html(url)

# It will give us all the tables in a list, we only want the first table.
can_cities_table = can_cities_table[0]
print(can_cities_table)

    Rank(2016)      Municipality          Province          Municipal status  \
0            1           Toronto           Ontario                      City   
1            2          Montreal            Quebec                     Ville   
2            3           Calgary           Alberta                      City   
3            4            Ottawa           Ontario                      City   
4            5          Edmonton           Alberta                      City   
5            6       Mississauga           Ontario                      City   
6            7          Winnipeg          Manitoba                      City   
7            8         Vancouver  British Columbia                      City   
8            9          Brampton           Ontario                      City   
9           10          Hamilton           Ontario                      City   
10          11       Quebec City            Quebec                     Ville   
11          12            Surrey  Britis

In [3]:
# Dropping columns that I do not want
can_cities_table = can_cities_table.drop(columns=["Rank(2016)", "Municipal status",
                                                 "Province",
                                                 "Population(2011)",
                                                 "Population(2006)",
                                                 "Population(2001)",
                                                 "Population(1996)"])
can_cities_table.rename(columns={"Municipality": "City"}, inplace=True)
can_cities_table.head()


# # change cities column into a list
# canadian_cities = can_cities_table["Cities"].tolist()

Unnamed: 0,City,"Land area(km2, 2011)",Growth Rate 2011–2016,Population(2016)
0,Toronto,630.2,4.46%,2731571
1,Montreal,365.1,3.34%,1704694
2,Calgary,825.3,12.99%,1239220
3,Ottawa,2790.2,5.76%,934243
4,Edmonton,684.4,14.82%,932546


In [4]:
canadian_cities = can_cities_table["City"].tolist()

In [5]:
len(canadian_cities)

100

In [6]:
api_key = "53efafc18c686b9dcae32b983edb6db4f3ef23d8"
city = "Toronto"
url = f'https://api.waqi.info/feed/{city}/?token={api_key}'



In [7]:
response = requests.get(url).json()
pprint(response)

{'data': {'aqi': 17,
          'attributions': [{'name': 'Air Quality Ontario - the Ontario '
                                    'Ministry of the Environment and Climate '
                                    'Change',
                            'url': 'http://www.airqualityontario.com/'},
                           {'name': 'World Air Quality Index Project',
                            'url': 'https://waqi.info/'}],
          'city': {'geo': [43.653226, -79.3831843],
                   'name': 'Toronto',
                   'url': 'https://aqicn.org/city/toronto'},
          'debug': {'sync': '2019-06-04T16:26:43+09:00'},
          'dominentpol': 'pm25',
          'iaqi': {'co': {'v': 1.9},
                   'h': {'v': 62.6},
                   'no2': {'v': 16.6},
                   'o3': {'v': 5.6},
                   'p': {'v': 1018.2},
                   'pm25': {'v': 17},
                   'so2': {'v': 0.2},
                   't': {'v': 11.2},
                   'w': {'v': 0.3}

In [8]:
response["data"]["iaqi"]["co"]

{'v': 1.9}

In [9]:
count = 0
cities = []
aqi = []


# 5 pollutants used to calculate AQI
pollutants = {"CO":[],
              "NO2":[],
              "SO2":[],
              "pm25":[]
             }

for city in canadian_cities:
    city_name = city
    url = f'https://api.waqi.info/feed/{city}/?token={api_key}'
    response = requests.get(url).json()
    if (response["status"] == "ok"):
        # sometime aqi might not be a number, exclude them
        if (isinstance(response["data"]["aqi"], int)):
            # append aqi and city name to appropriate list
            aqi.append(response["data"]["aqi"])
            cities.append(city)
            
            # not all pollutants are always present
            for item in pollutants.keys():
                # checking if 4 pollutant is present
                if item in response["data"]["iaqi"].keys():
                    pollutants[item].append(response["data"]["iaqi"][item]["v"])
                else:
                    pollutants[item].append('na')

    

In [10]:
print(cities)

print(aqi)

['Toronto', 'Montreal', 'Calgary', 'Ottawa', 'Edmonton', 'Mississauga', 'Winnipeg', 'Brampton', 'Hamilton', 'Surrey', 'Laval', 'Halifax', 'London', 'Markham', 'Vaughan', 'Gatineau', 'Saskatoon', 'Longueuil', 'Kitchener', 'Burnaby', 'Windsor', 'Regina', 'Richmond', 'Richmond Hill', 'Oakville', 'Burlington', 'Greater Sudbury', 'Sherbrooke', 'Oshawa', 'Saguenay', 'Lévis', 'Barrie', 'Abbotsford', 'Coquitlam', 'Trois-Rivières', 'St. Catharines', 'Guelph', 'Cambridge', 'Kelowna', 'Kingston', 'Langley', 'Terrebonne', 'Milton', "St. John's", 'Thunder Bay', 'Waterloo', 'Delta', 'Red Deer', 'Brantford', 'Lethbridge', 'Nanaimo', 'Kamloops', 'North Vancouver', 'Victoria', 'Brossard', 'Repentigny', 'Newmarket', 'Chilliwack', 'Maple Ridge', 'Peterborough', 'Prince George', 'Sault Ste. Marie', 'Moncton', 'Sarnia', 'New Westminster', 'Saint John', 'St. Albert', 'Medicine Hat', 'Grande Prairie', 'Airdrie', 'Fredericton', 'Aurora', 'North Vancouver', 'Welland', 'North Bay', 'Belleville']
[17, 12, 11, 9,

In [11]:
pollutants["CO"]

['na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na',
 'na']