# Download Countries Code

This code will download the country code aligned with the ISO used by the World Bank. The alignment is important, since we will use the LSMS collected by the world bank and with the correct codes, we can automate some parts.  We load the list of countries from [Wikipedia](https://en.wikipedia.org/wiki/Sub-Saharan_Africa) and get the country codes from the [World Bank API](https://microdata.worldbank.org/api-documentation/catalog/index.html#operation/listCountryCodes). Since the name are not always same, some missing countries are manually added. So far, we used data of African and Asian countries.

In [None]:
# Mount the drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Working directory
cd drive/MyDrive/src/0_lsms_processing/

In [None]:
import pandas as pd
import requests

from typing import List, Set, Dict

In [None]:
# Continent to analyze. So far, africa and asia are the 2 alternatives.
continent: str = 'africa'
path: str = f'../../data/continents/{continent}/countries_meta/'

In [None]:
df: pd.DataFrame = pd.read_csv(path + "countries.csv")
countries: Set[str] = set(df["country"])

In [None]:
# Url to WorldBank catalog
url: str = "https://microdata.worldbank.org/index.php/api/catalog/country_codes"
codes: any = requests.get(url).json()

In [None]:
def diff(l1: List[any], l2: List[any]) -> List[any]:
	"""Get difference between two lists. 

	Args:
		l1 (list[any]): list 1
		l2 (list[any]): list 2

	Returns:
		A list of the different values. 
	"""
	return list(set(l1) - set(l2)) + list(set(l2) - set(l1))

In [None]:
countries_code: Dict[List[str], List[str]] = {
    "name": [],
    "iso" : [],
}

verify: List[str] = []
for code in codes["country_codes"]:
    if code["name"] in countries:
        countries_code["name"].append(code["name"])
        countries_code["iso"].append(code["iso"])
        verify.append(code["name"])

In [None]:
diff(verify, df["country"]) # output for manual work

In [None]:
# Manual correction
if continent == 'africa':

  countries_code["name"].append("Republic of the Congo")
  countries_code["iso"].append("COG")

  countries_code["name"].append("Democratic Republic of the Congo")
  countries_code["iso"].append("COD")

  countries_code["name"].append("Ivory Coast")
  countries_code["iso"].append("CI")

  countries_code["name"].append("Cabo Verde")
  countries_code["iso"].append("CPV")

elif continent == 'asia':

  countries_code["name"].append("Iran")
  countries_code["iso"].append("IRN")

  countries_code["name"].append("Hong Kong")
  countries_code["iso"].append("HKG")

  countries_code["name"].append("Taiwan")
  countries_code["iso"].append("TWN")

  countries_code["name"].append("Kyrgyzstan")
  countries_code["iso"].append("KGZ")

  countries_code["name"].append("Laos")
  countries_code["iso"].append("LAO")

  countries_code["name"].append("South Korea")
  countries_code["iso"].append("PRK")

  countries_code["name"].append("North Korea")
  countries_code["iso"].append("KOR")

  countries_code["name"].append("Syria")
  countries_code["iso"].append("SYR")

  countries_code["name"].append("Turkey")
  countries_code["iso"].append("TUR")

  countries_code["name"].append("Yemen")
  countries_code["iso"].append("YEM")


In [None]:
# Save to csv the output
pd.DataFrame.from_dict(countries_code).to_csv(path + "countries_code.csv", index=False)