In [15]:
import requests
from bs4 import BeautifulSoup as BS
import json
import pandas as pd
from io import StringIO
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances

In [16]:
class WorldBankClimate:
  def __init__(self):
    pass

  def _query_weather_forecast(self, *args): 
    url = self._build_url(*args)
    csv_content = self._parse_html(url)
    data = self._parse_csv(csv_content)
    return data

  def _build_url(self, *args):
    url = 'http://climatedataapi.worldbank.org/climateweb/rest/v1/country'
    for arg in args: 
      url = '/'.join([url, arg])
    return url + ".csv"

  def _parse_html(self, url):
    html_content = requests.get(url).text
    return BS(html_content).p.contents[0]
  
  def _parse_csv(self, content):
    return pd.read_csv(StringIO(content))

  def forecast_temp(self, is_monthly, start_yr, gcm='csiro_mk3_5', country="SEN"):
    if is_monthly:
      aggregation_type = 'mavg'
    else: #annually
      aggregation_type = 'annualavg'
    sres = 'a2'
    index = 'tas'
    yr_range = str(start_yr) + '/' + str(start_yr+19) \
      if start_yr in [1920, 1940, 1960, 1980, 2020, 2040, 2060, 2080] \
      else print("Invalid start year")
    
    return self._query_weather_forecast(aggregation_type, gcm, sres, index, yr_range, country)

  def forecast_pr(self, is_monthly, start_yr=2020, gcm='csiro_mk3_5', country="SEN"):
    if is_monthly:
      aggregation_type = 'mavg'
    else: #annually
      aggregation_type = 'annualavg'
    sres = 'a2'
    index = 'pr'
    yr_range = str(start_yr) + '/' + str(start_yr+19) \
      if start_yr in [1920, 1940, 1960, 1980, 2020, 2040, 2060, 2080] \
      else print("Invalid start year")
    
    return self._query_weather_forecast(aggregation_type, gcm, sres, index, yr_range, country)

In [17]:
def query_temp_pr_info(countries, is_monthly=True, start_yr=2020):
  forecast_model = WorldBankClimate()
  is_first = True

  for country in countries:
    temp_forecast = forecast_model.forecast_temp(is_monthly, start_yr, country=country)
    temp_forecast['country'] = country
    pr_forecast = forecast_model.forecast_pr(is_monthly, start_yr, country=country)
    pr_forecast['country'] = country

    if is_first:
      countries_temp_forecast = temp_forecast
      countries_pr_forecast = pr_forecast
      is_first = False
    else:
      countries_temp_forecast = countries_temp_forecast.append(temp_forecast)
      countries_pr_forecast = countries_pr_forecast.append(pr_forecast)

  countries_temp_forecast.set_index(['country'], inplace=True)
  countries_pr_forecast.set_index(['country'], inplace=True)

  return countries_temp_forecast, countries_pr_forecast


#Example: compare Senegal's temperature and precipitation with other countries in Sahel between 2020~2039

In [18]:
# countries info queried from http://climatedataapi.worldbank.org/climateweb/rest/v1/country/
Sahel = [{"name":"Senegal","iso3":"SEN","iso2":"SN","id":559},
{"name":"Mauritania","iso3":"MRT","iso2":"MR","id":530},
{"name":"Mali","iso3":"MLI","iso2":"ML","id":529},
{"name":"Burkina Faso","iso3":"BFA","iso2":"BF","id":552},
{"name":"Niger","iso3":"NER","iso2":"NE","id":525},
{"name":"Nigeria","iso3":"NGA","iso2":"NG","id":527},
{"name":"Chad","iso3":"TCD","iso2":"TD","id":524},
{"name":"Central African Republic","iso3":"CAF","iso2":"CF","id":528},
{"name":"Sudan","iso3":"SDN","iso2":"SD","id":526},
{"name":"Eritrea","iso3":"ERI","iso2":"ER","id":531},
{"name":"Ethiopia","iso3":"ETH","iso2":"ET","id":532}]

# use iso3 name
Sahel_countries = [country['iso3'] for country in Sahel]

# query average monthly temperature and precipitation of all countries in 2020 ~ 2039
Sahel_countries_temp_forecast, Sahel_countries_pr_forecast = query_temp_pr_info(Sahel_countries)

# compare differences in temperature pattern and amount
pattern_difference = cosine_similarity(Sahel_countries_temp_forecast[['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']])[0]
amount_difference = euclidean_distances(Sahel_countries_temp_forecast[['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']])[0]
print("Temperature similarity")
for country, pattern, amount in zip(Sahel_countries_temp_forecast.index, pattern_difference, amount_difference):
  print(country, ":", pattern, '\t', amount)

# compare differences in precipitation pattern and amount
print()
pattern_difference = cosine_similarity(Sahel_countries_pr_forecast[['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']])[0]
amount_difference = euclidean_distances(Sahel_countries_pr_forecast[['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']])[0]
print("Precipitation similarity")
for country, pattern, amount in zip(Sahel_countries_pr_forecast.index, pattern_difference, amount_difference):
  print(country, ":", pattern, '\t', amount)

Temperature similarity
SEN : 1.0 	 0.0
MRT : 0.9903038242899798 	 15.063918331674365
MLI : 0.993918371708739 	 12.962497740739154
BFA : 0.9988812734590367 	 5.6722188176566135
NER : 0.9906901922128943 	 14.753923764592292
NGA : 0.9992828778479225 	 4.971536464998338
TCD : 0.9940882003556208 	 12.251793725232535
CAF : 0.9982588987199079 	 8.67392974586845
SDN : 0.9972321580215787 	 8.567201789994323
ERI : 0.9953499861376813 	 10.739994300463682
ETH : 0.999126474374886 	 12.993115407484492

Precipitation similarity
SEN : 1.0 	 0.0
MRT : 0.9703966210611493 	 101.28803127805212
MLI : 0.9434055016866728 	 52.64406577266291
BFA : 0.91705109688767 	 277.588491020627
NER : 0.7926716775574562 	 91.93966334319916
NGA : 0.8232170045854844 	 442.45048857816414
TCD : 0.7636139524865098 	 138.36053811589056
CAF : 0.7662503915871806 	 482.66690365994936
SDN : 0.7903300904489141 	 140.8290084147056
ERI : 0.7510553544678219 	 88.89754737598983
ETH : 0.699222366284624 	 210.84439737753328
