# Extract Data

In [1]:
# Dependencies
import requests
import pandas as pd

In [2]:
# Save config information
url = "https://ghoapi.azureedge.net/api/MDG_0000000007"


In [4]:
# Loop through the list of countries and get all JSON info and put into pandas df
response = requests.get(url).json() 

In [5]:
# Crate Dataframe
child_mortality_df = pd.DataFrame(response['value'])

# Display Dataframe
child_mortality_df.head(10)

Unnamed: 0,Id,IndicatorCode,SpatialDimType,SpatialDim,TimeDimType,TimeDim,Dim1Type,Dim1,Dim2Type,Dim2,...,DataSourceDim,Value,NumericValue,Low,High,Comments,Date,TimeDimensionValue,TimeDimensionBegin,TimeDimensionEnd
0,24294408,MDG_0000000007,COUNTRY,AFG,YEAR,1962,SEX,BTSX,,,...,,344.6,344.6295,299.42552,399.28771,,2020-09-22T14:19:46+02:00,1962,1962-01-01T00:00:00+01:00,1962-12-31T00:00:00+01:00
1,24294409,MDG_0000000007,COUNTRY,AFG,YEAR,1963,SEX,BTSX,,,...,,338.7,338.71554,296.66847,389.86617,,2020-09-22T14:19:46.033+02:00,1963,1963-01-01T00:00:00+01:00,1963-12-31T00:00:00+01:00
2,24294410,MDG_0000000007,COUNTRY,AFG,YEAR,1964,SEX,BTSX,,,...,,333.1,333.10666,293.10659,380.35079,,2020-09-22T14:19:46.097+02:00,1964,1964-01-01T00:00:00+01:00,1964-12-31T00:00:00+01:00
3,24294411,MDG_0000000007,COUNTRY,AFG,YEAR,1965,SEX,BTSX,,,...,,327.6,327.56087,289.05181,372.54933,,2020-09-22T14:19:46.127+02:00,1965,1965-01-01T00:00:00+01:00,1965-12-31T00:00:00+01:00
4,24294412,MDG_0000000007,COUNTRY,AFG,YEAR,1966,SEX,BTSX,,,...,,322.0,322.04611,284.75436,366.28313,,2020-09-22T14:19:46.157+02:00,1966,1966-01-01T00:00:00+01:00,1966-12-31T00:00:00+01:00
5,24294413,MDG_0000000007,COUNTRY,AFG,YEAR,1967,SEX,BTSX,,,...,,316.8,316.79465,280.07432,360.10587,,2020-09-22T14:19:46.19+02:00,1967,1967-01-01T00:00:00+01:00,1967-12-31T00:00:00+01:00
6,24294414,MDG_0000000007,COUNTRY,AFG,YEAR,1968,SEX,BTSX,,,...,,311.4,311.38246,275.40339,354.11656,,2020-09-22T14:19:46.22+02:00,1968,1968-01-01T00:00:00+01:00,1968-12-31T00:00:00+01:00
7,24294415,MDG_0000000007,COUNTRY,AFG,YEAR,1969,SEX,BTSX,,,...,,305.8,305.76466,270.78063,347.89892,,2020-09-22T14:19:46.267+02:00,1969,1969-01-01T00:00:00+01:00,1969-12-31T00:00:00+01:00
8,24294416,MDG_0000000007,COUNTRY,AFG,YEAR,1970,SEX,BTSX,,,...,,300.3,300.27875,266.01564,342.33624,,2020-09-22T14:19:46.3+02:00,1970,1970-01-01T00:00:00+01:00,1970-12-31T00:00:00+01:00
9,24294417,MDG_0000000007,COUNTRY,AFG,YEAR,1971,SEX,BTSX,,,...,,294.8,294.75546,260.8647,336.43932,,2020-09-22T14:19:46.347+02:00,1971,1971-01-01T00:00:00+01:00,1971-12-31T00:00:00+01:00


# Transform Data

In [6]:
# filter dataframe to specific columns
child_mortality = child_mortality_df[['SpatialDim', 'TimeDim', 'Dim1', 'NumericValue']].copy()
child_mortality.head()

Unnamed: 0,SpatialDim,TimeDim,Dim1,NumericValue
0,AFG,1962,BTSX,344.6295
1,AFG,1963,BTSX,338.71554
2,AFG,1964,BTSX,333.10666
3,AFG,1965,BTSX,327.56087
4,AFG,1966,BTSX,322.04611


In [8]:
# Rename the column headers
child_mortality = child_mortality.rename(columns={"SpatialDim": "country",
                                                          "TimeDim": "year",
                                                          "Dim1": "gender",
                                                          "NumericValue":"mortality_rate"})
child_mortality.head()

Unnamed: 0,country,year,gender,mortality_rate
0,AFG,1962,BTSX,344.6295
1,AFG,1963,BTSX,338.71554
2,AFG,1964,BTSX,333.10666
3,AFG,1965,BTSX,327.56087
4,AFG,1966,BTSX,322.04611


In [11]:
# Remove all years except 2017 from dataframe
mortality_totalYear = child_mortality.loc[child_mortality['year'] == 2017]
mortality_totalYear.head()

Unnamed: 0,country,year,gender,mortality_rate
55,AFG,2017,BTSX,64.94076
113,AFG,2017,FMLE,61.25243
171,AFG,2017,MLE,68.49211
211,AGO,2017,BTSX,80.6223
251,AGO,2017,FMLE,74.53114


In [12]:
# only keep btsx(both sex) data in dataframe
updated_sex_df = mortality_totalYear.loc[mortality_totalYear['gender'] == "BTSX"]
updated_sex_df.head()

Unnamed: 0,country,year,gender,mortality_rate
55,AFG,2017,BTSX,64.94076
211,AGO,2017,BTSX,80.6223
333,ALB,2017,BTSX,9.41805
452,AND,2017,BTSX,3.21892
582,ARE,2017,BTSX,7.6953


In [14]:
updated_sex_df.to_csv('mortalities.csv',index = False, header=True)