In [1]:
import urllib.request, urllib.parse, urllib.error
from bs4 import BeautifulSoup
import pandas as pd
import re
import Wrangler as Wr

# Set of countries to be renamed for country code matching
countryRename = {'Åland':'Åland Islands','Burma':'Myanmar','Cape Verde':'Cabo Verde','Iran':'Iran (Islamic Republic of)',
                 'Czech Republic':'Czechia',
                'Bolivia':'Bolivia (Plurinational State of)','British Virgin Islands':'Virgin Islands (British)',
                 'Bonaire, Saint Eustatius and Saba':'Bonaire, Sint Eustatius and Saba',
                'Congo (Democratic Republic of the)':'Congo, Democratic Republic of the',
                'Falkland Islands (Islas Malvinas)':'Falkland Islands (Malvinas)',
                'Federated States of Micronesia':'Micronesia (Federated States of)',
                'French Southern and Antarctic Lands':'French Southern Territories','Laos':"Lao People's Democratic Republic",
                'Macau':'Macao','Macedonia':'North Macedonia','Moldova':'Moldova, Republic of',
                 'North Korea':"Korea (Democratic People's Republic of)",'Palestina':'Palestine, State of',
                'Reunion':'Réunion','Russia':'Russian Federation','Saint Martin':'Saint Martin (French part)',
                'Sint Maarten':'Sint Maarten (Dutch part)','South Georgia and the South Sandwich Isla':'South Georgia and the South Sandwich Islands',
                'South Korea':'Korea, Republic of','Swaziland':'Eswatini','Syria':'Syrian Arab Republic',
                'Tanzania':'Tanzania, United Republic of','Turks and Caicas Islands':'Turks and Caicos Islands',
                'United Kingdom':'United Kingdom of Great Britain and Northern Ireland',
                'United States':'United States of America','Venezuela':'Venezuela (Bolivarian Republic of)',
                'Vietnam':'Viet Nam','Virgin Islands':'Virgin Islands (U.S.)'}

# The geomap shape data uses slightly different codes for some countries
reCode = {'ESH':'SAH','PSE':'PSX'}

Import dictionary of country codes

In [2]:
# specify the url
list_page = 'https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3#Uses_and_applications'

# query the website and return the html to the variable ‘page’
page = urllib.request.urlopen(list_page)

# parse the html using beautiful soup and store in variable `soup`
soup = BeautifulSoup(page, 'html.parser')

# Create a list of each country link
table = soup.find('div', attrs={'class': 'plainlist'})
rows = table.findAll('li')
countryCodes = {}
for li in rows:
    code = li.find('span', attrs={'class':'monospaced'}).text
    country = li.find('a').text
    countryCodes.update({country:code})

Load and clean country data

In [3]:
# Load Country Temperature data
filePath = Wr.makeDirFile('Temp_countries')
cTempdata = pd.read_csv(filePath)

# Remove some of the dupicated countries that end with '(Europe)'
cTempdata = cTempdata[~cTempdata.Country.str.contains('Europe')]

# Retain data at annual frequency
cTempdata = cTempdata[cTempdata.Month.isin([6])]

# Remove unneeded columns
cTempdata = cTempdata.drop(columns = {'MonthlyAnomaly','Month','Date',
                                      'landPercent','TwentyYearAnomaly','TenYearAnomaly'})
cols = [c for c in cTempdata.columns if c.lower()[-3:] != 'unc']
cTempdata=cTempdata[cols]

# Create country code column
cTempdata['country_code'] = cTempdata.Country.replace(countryRename).replace(countryCodes).replace(reCode)

# Drop uninhabited islands (non-country)
dropNames = cTempdata[cTempdata.Country.isin(['Baker Island','Gaza Strip','Kingman Reef','Palmyra Atoll'])].index
cTempdata.drop(dropNames , inplace=True)

# Reset index
cTempdata = cTempdata.reset_index().drop(columns='index')

Export data as csv

In [4]:
# Load Country Temperature data
filePath = Wr.makeDirFile('cleanedCountries')
cTempdata.to_csv(filePath)