In [None]:
from IPython.display import display, HTML

import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib as mlp
import matplotlib.pyplot as plt
import geopandas as gp
import os

%matplotlib inline
plt.style.use('seaborn-whitegrid')

In [None]:
plt.rcParams["figure.facecolor"] = "w"
plt.rcParams["figure.dpi"] = 120
pd.options.display.max_columns = 1000

In [None]:
os.chdir("../../core")
import GT_helper_functions as hf
import GT_load_data as data

In [None]:
base_dir = "../../Outcome Measurement Data/MULTI/"

In [None]:
gbd17 = pd.read_csv("../../Outcome Measurement Data/MULTI/GBD-2017/IHME-GBD_2017_DATA-3f789f48-1.csv")

In [None]:
gbd17.columns

In [None]:
#gbd17.age.value_counts()
# All ages :
allages = ["Under 5", "5-14 years", "15-49 years", "50-69 years", "70+ years"]
countries = ["Guatemala", "El Salvador", "Mexico", "Honduras"]


In [None]:
gbdCounts = gbd17[(gbd17.metric== "Number") & 
      (gbd17.location.isin(countries)) &
      (gbd17.age.isin(allages))]\
    .groupby(["cause", "year", "location", "measure"]).agg({
            "val": "sum", "upper": "sum", "lower": "sum"
        })

In [None]:
gbd15_17pop = pd.read_csv("../../Outcome Measurement Data/MULTI/GBD-2017/IHME_GBD_2017_POP_2015_2017_Y2018M11D08.CSV")
gbd10_14pop = pd.read_csv("../../Outcome Measurement Data/MULTI/GBD-2017/IHME_GBD_2017_POP_2010_2014_Y2018M11D08.CSV")
gbd05_09pop = pd.read_csv("../../Outcome Measurement Data/MULTI/GBD-2017/IHME_GBD_2017_POP_2005_2009_Y2018M11D08.CSV")
gbd00_04pop = pd.read_csv("../../Outcome Measurement Data/MULTI/GBD-2017/IHME_GBD_2017_POP_2000_2004_Y2018M11D08.CSV")

In [None]:
gbd15_17pop.sex_name.value_counts()

In [None]:
def aggPop(dataframe):
    return dataframe[dataframe.age_group_name.isin(allages) & 
                (dataframe.location_name.isin(countries)) &
                (dataframe.sex_name == "Both")
               ].groupby(["year_id","location_name"]).agg({
        "val": "sum",
        "upper": "sum",
        "lower": "sum"
    })

In [None]:
pops = pd.concat([
    aggPop(gbd15_17pop),
    aggPop(gbd10_14pop),
    aggPop(gbd05_09pop),
    aggPop(gbd00_04pop)
          ]).sort_index()

In [None]:
pops/1000000

In [None]:
gbdCounts = gbdCounts.reset_index()

In [None]:
gbdCounts["cause2"] = gbdCounts.cause.map(lambda x: "TB" if "Tuberculosis" in x else "HIV" if "HIV" in x else "Malaria" if "Malaria" in x else "")
# This still is not taking in to account coinfection of hiv and tb.

In [None]:
summary = gbdCounts[gbdCounts.year >= 2000].groupby(["year", "cause2", "measure", "location"])\
    .apply(lambda x: 100000*x.val.sum()/pops.loc[(slice(x["year"].values[0], x["year"].values[0]), slice(x["location"].values[0], x["location"].values[0])), ].val.values[0])\
    .round(5).unstack(2)
gbdCounts.location
summary.to_csv("../../Outcome Measurement Data/MULTI/GBD-2017/summary.csv")

In [None]:
mlp.rcParams["figure.figsize"] = (8, 4)
mlp.rcParams["figure.dpi"] = 200
def tsPlot(disease, measure, prefix, ax = None):
    temp = summary.loc[(slice(2000,2017), slice(disease, disease)),measure].unstack(2)
    temp.columns = temp.columns.values
    temp.plot(ax = ax)
    plt.xlabel("")
    plt.ylabel(prefix + " rate x 100,000")
    #plt.xticks(range(0,18), range(2000,2018), rotation="90")


In [None]:
mlp.rcParams["figure.figsize"] = (11, 9)
ax = plt.subplot(3, 2, 1)
tsPlot("HIV", "Incidence", "HIV Incidence", ax)
plt.legend().remove()
ax = plt.subplot(3, 2, 2)
tsPlot("HIV", "Deaths", "HIV Mortality", ax)
plt.legend().remove()
ax = plt.subplot(3, 2, 3)
tsPlot("Malaria", "Incidence", "Malaria Incidence", ax)
plt.legend().remove()
ax = plt.subplot(3, 2, 4)
tsPlot("Malaria", "Deaths", "Malaria Mortality", ax)
plt.legend().remove()
ax = plt.subplot(3, 2, 5)
tsPlot("TB", "Incidence", "TB Incidence", ax)
ax = plt.subplot(3, 2, 6)
tsPlot("TB", "Deaths", "TB Mortality", ax)
plt.legend().remove()
plt.tight_layout()

In [None]:
summary.loc[(slice(2015,2017), slice(None)), ].unstack(1).round(2)

In [None]:
gbdCounts[(gbdCounts.year==2016) & (gbdCounts.cause.map(lambda x: "Tuberculosis" in x)) & (gbdCounts.location == "El Salvador") &
          (gbdCounts.measure == "Incidence")].val.sum()