In [1]:
from matplotlib import pyplot as plt

import pandas as pd
import numpy as np
import seaborn as sns

import os

In [21]:
""" Data locations
"""

unpd_root = "../../data/unpd_data"
gbd_root = "../../data/gbd_data"

prepped_data_root = "../../data/prepared_data"

# UN Data

Extract a "World" subset for working with on plotting, global forecasts and working out next 8BN

In [12]:
unpd_scenarios = ["High","Medium","Low"]

In [4]:
df_unpd_medium_indicators = pd.read_csv(
    os.path.join(unpd_root, "WPP2022_Demographic_Indicators_Medium.csv"),
    low_memory=False
)
df_unpd_other_indicators = pd.read_csv(
    os.path.join(unpd_root, "WPP2022_Demographic_Indicators_OtherVariants.csv"),
    low_memory=False
)

In [5]:
df_unpd_all_indicators = pd.concat(
    [df_unpd_medium_indicators, df_unpd_other_indicators]
)

In [13]:
df_unpd_all_indicators_scenario_filter = df_unpd_all_indicators[
    df_unpd_all_indicators.Variant.apply(lambda v: v in unpd_scenarios)
]

In [19]:
df_unpd_world_indicators = df_unpd_all_indicators_scenario_filter[df_unpd_all_indicators_scenario_filter.Location=="World"]

In [20]:
df_unpd_world_indicators

Unnamed: 0,SortOrder,LocID,Notes,ISO3_code,ISO2_code,SDMX_code,LocTypeID,LocTypeName,ParentID,Location,...,Q0060Male,Q0060Female,Q1550,Q1550Male,Q1550Female,Q1560,Q1560Male,Q1560Female,NetMigrations,CNMR
0,1,900,,,,1.0,1,World,0,World,...,580.7496,498.0396,240.3164,271.6253,208.1916,378.6968,430.2594,324.9308,0.0,0.0
1,1,900,,,,1.0,1,World,0,World,...,566.7283,490.1993,231.1772,258.0905,203.7797,368.3186,415.8362,319.3364,0.0,0.0
2,1,900,,,,1.0,1,World,0,World,...,546.3173,477.2639,218.6745,240.0344,197.1422,353.0546,395.5327,309.9103,0.0,0.0
3,1,900,,,,1.0,1,World,0,World,...,535.8289,469.5325,212.8720,232.6019,193.0492,345.0826,385.8433,303.9053,0.0,0.0
4,1,900,,,,1.0,1,World,0,World,...,523.1236,458.4842,205.7622,224.0498,187.4439,335.4420,374.6576,295.9944,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,1,900,,,,1.0,1,World,0,World,...,122.0912,75.3816,42.0803,52.6144,31.2046,82.1630,101.6896,62.0157,0.0,0.0
156,1,900,,,,1.0,1,World,0,World,...,120.9810,74.5713,41.6958,52.1708,30.8825,81.3941,100.8012,61.3725,0.0,0.0
157,1,900,,,,1.0,1,World,0,World,...,119.8785,73.7692,41.3176,51.7328,30.5672,80.6444,99.9306,60.7502,0.0,0.0
158,1,900,,,,1.0,1,World,0,World,...,118.7821,73.0390,40.9472,51.2861,30.2770,79.9215,99.0661,60.1766,0.0,0.0


In [21]:
df_unpd_world_indicators.columns

Index(['SortOrder', 'LocID', 'Notes', 'ISO3_code', 'ISO2_code', 'SDMX_code',
       'LocTypeID', 'LocTypeName', 'ParentID', 'Location', 'VarID', 'Variant',
       'Time', 'TPopulation1Jan', 'TPopulation1July', 'TPopulationMale1July',
       'TPopulationFemale1July', 'PopDensity', 'PopSexRatio', 'MedianAgePop',
       'NatChange', 'NatChangeRT', 'PopChange', 'PopGrowthRate',
       'DoublingTime', 'Births', 'Births1519', 'CBR', 'TFR', 'NRR', 'MAC',
       'SRB', 'Deaths', 'DeathsMale', 'DeathsFemale', 'CDR', 'LEx', 'LExMale',
       'LExFemale', 'LE15', 'LE15Male', 'LE15Female', 'LE65', 'LE65Male',
       'LE65Female', 'LE80', 'LE80Male', 'LE80Female', 'InfantDeaths', 'IMR',
       'LBsurvivingAge1', 'Under5Deaths', 'Q5', 'Q0040', 'Q0040Male',
       'Q0040Female', 'Q0060', 'Q0060Male', 'Q0060Female', 'Q1550',
       'Q1550Male', 'Q1550Female', 'Q1560', 'Q1560Male', 'Q1560Female',
       'NetMigrations', 'CNMR'],
      dtype='object')

In [22]:
df_unpd_world_indicators.to_csv("../../data/prepared_data/unpd_combined_indicators.csv")

In [23]:
df_unpd_world_indicators.to_csv("../../data/prepared_data/unpd_world_indicators.csv") # for global stats and working out 8BN

# Global Burden of Disease

Do the same thing with the GBD study

In [12]:
gbd_pop_all_sex_all_age = "../../data/gbd_data/IHME_POP_2017_2100_POP_BOTH_SEX_ALL_AGE_Y2020M05D01.CSV"
gbd_births = "../../data/gbd_data/IHME_POP_2017_2100_LIVE_BIRTHS_Y2020M05D01.CSV"

gbd_scenarios = ["Reference","SDG Met Need and Education"]

In [7]:
df_gbd_pop_all = pd.read_csv(gbd_pop_all_sex_all_age)
df_gbd_births = pd.read_csv(gbd_births)

In [16]:
df_gbd_pop_all_scenario_filtered = df_gbd_pop_all[
    df_gbd_pop_all.scenario_name.apply(lambda s: s in gbd_scenarios)
]

df_gbd_births_scenario_filtered = df_gbd_births[
    df_gbd_births.scenario_name.apply(lambda s: s in gbd_scenarios)
]

In [19]:
df_gbd_births_global = df_gbd_births_scenario_filtered[
    df_gbd_births_scenario_filtered.location_name=="Global"
]

df_gbd_pop_all_global = df_gbd_pop_all_scenario_filtered[
    df_gbd_pop_all_scenario_filtered.location_name=="Global"
]

In [22]:
df_gbd_pop_all_scenario_filtered.to_csv(os.path.join(prepped_data_root, "gbd_scenarios_pop.csv"))
df_gbd_pop_all_global.to_csv(os.path.join(prepped_data_root, "gbd_world_pop.csv"))
df_gbd_births_global.to_csv(os.path.join(prepped_data_root, "gbd_world_births.csv"))