# 1. Set-up environment

In [1]:
# Import các thư viện cần thiết
import requests
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
# import seaborn as sns
import json
import random

# 2. Collect data using Web API

In [2]:

BASE_URL = 'https://ghoapi.azureedge.net/api/MORT_100?$filter=Dim1%20eq%20%27YEARS0-4%27%20and%20SpatialDimType%20eq%20%27COUNTRY%27'
COUNTRIES = {"Country_ID": [], "Country_Name": []}
YEARS = (list(range(2000, 2022)))
CAUSES = {"Causes_ID": [], "Causes_Name": []}
NUMBER_OF_DEATHS = {}


def collect_Causes():
    url = "https://ghoapi.azureedge.net/api/DIMENSION/CHILDCAUSE/DimensionValues"
    response = requests.get(url)
    causes_data = response.json()['value']
    causes_df = pd.DataFrame(causes_data).loc[:, ["Code", "Title"]]

    if len(CAUSES["Causes_ID"]) == 0:
        for i in range(len(causes_df)):
            CAUSES["Causes_ID"].append(causes_df.iloc[i]['Code'])
            CAUSES["Causes_Name"].append(causes_df.iloc[i]['Title'])
            NUMBER_OF_DEATHS[causes_df.iloc[i]['Title']] = []

    return pd.DataFrame(CAUSES)


def collect_Countries():
    url = BASE_URL + "%20and%20Dim2%20eq%20%27" + CAUSES["Causes_ID"][0] + "%27%20and%20TimeDim%20eq%20" + str(YEARS[0])
    response = requests.get(url)
    countries_data = response.json()['value']
    countries_df = pd.DataFrame(countries_data).loc[:, ["SpatialDim"]]
    countries_df['SpatialDim'] = sorted(countries_df['SpatialDim'])

    if len(COUNTRIES["Country_ID"]) == 0:
        for i in range(len(countries_df)):
            COUNTRIES["Country_ID"].append(countries_df.iloc[i]['SpatialDim'])
            COUNTRIES["Country_Name"].append(None)

    url = "https://ghoapi.azureedge.net/api/DIMENSION/COUNTRY/DimensionValues"
    response = requests.get(url)
    countries_data = response.json()['value']
    countries_df = pd.DataFrame(countries_data).loc[:, ["Code", "Title"]]   
    countries_df = countries_df[countries_df['Code'].isin(COUNTRIES["Country_ID"])].reset_index(drop=True)
    COUNTRIES["Country_Name"] = countries_df["Title"]

    return pd.DataFrame(COUNTRIES)


In [3]:
causes_df = collect_Causes()
pd.set_option('max_colwidth', 100)
causes_df

Unnamed: 0,Causes_ID,Causes_Name
0,CH10,Prematurity
1,CH11,Birth asphyxia and birth trauma
2,CH12,Sepsis and other infectious conditions of the newborn
3,CH13,Other Group 1 (postneonatal only)
4,CH15,Congenital anomalies
5,CH16,Other noncommunicable diseases (postneonatal only)
6,CH17,Injuries
7,CH18,Tuberculosis
8,CH19,Other Group 1 and Other noncommunicable (neonatal and under-5 only)
9,CH2,HIV/AIDS


In [4]:
countries_df = collect_Countries()
countries_df

Unnamed: 0,Country_ID,Country_Name
0,AFG,Afghanistan
1,AGO,Angola
2,ALB,Albania
3,AND,Andorra
4,ARE,United Arab Emirates
...,...,...
189,WSM,Samoa
190,YEM,Yemen
191,ZAF,South Africa
192,ZMB,Zambia


In [5]:
def collect_data():

    # fill data for each attribute
    for year in YEARS:
        for cause_id, cause_name in zip(CAUSES["Causes_ID"], CAUSES["Causes_Name"]):
            # create url and params
            url = BASE_URL + "%20and%20Dim2%20eq%20%27" + cause_id + "%27%20and%20TimeDim%20eq%20" + str(year)

            # send a GET request to the URL and return JSON content from WebAPI
            df = pd.DataFrame(requests.get(url).json()['value'])
            if "SpatialDim" in df.columns and "NumericValue" in df.columns:
                df = pd.DataFrame(requests.get(url).json()['value']).loc[:, ["SpatialDim", "NumericValue"]]
                NUMBER_OF_DEATHS[cause_name].extend(list(df.sort_values('SpatialDim').reset_index(drop=True)["NumericValue"]))
            else:
                NUMBER_OF_DEATHS[cause_name].extend(list(None for _ in range(len(COUNTRIES["Country_ID"]))))
        print(len(NUMBER_OF_DEATHS["Prematurity"]))
        print(len(NUMBER_OF_DEATHS["Pertussis"]))

            
        print("Loading data in", year)

In [None]:
collect_data()

In [None]:
#TEST
data_number_of_deaths = collect_data()
assert data_number_of_deaths.shape == (4268, 19)

In [None]:
# Save to csv file with name countries.csv
#TODO
data_number_of_deaths.to_csv("number_of_deaths.csv", index=False)