In [None]:
from IPython.display import display, HTML

import xlrd
import pandas as pd
import geopandas
import numpy as np
import seaborn as sb
import matplotlib as mlp
import matplotlib.pyplot as plt

import geopandas as gp

%matplotlib inline
plt.style.use('ggplot')
mlp.rcParams['figure.figsize'] = [8.0, 8.0]

In [None]:

sb.set_style("whitegrid")
plt.rcParams["ytick.labelsize"] = 14
plt.rcParams["axes.labelsize"] = 16
plt.rcParams["xtick.labelsize"] = 16
plt.rcParams["grid.linewidth"] = 2.2
plt.rcParams["axes.edgecolor"] = "0.15"
plt.rcParams["axes.linewidth"]  = 1.25
plt.rcParams["legend.frameon"] = True
plt.rcParams["legend.framealpha"]= 0.9

In [None]:
cohortes = {
    2012: xlrd.open_workbook("../../../../Outcome Measurement Data/TUBERCULOSIS/COHORTES/cohorte 2012 consolidado y por area .xlsx"),
    2013: xlrd.open_workbook("../../../../Outcome Measurement Data/TUBERCULOSIS/COHORTES/cohorte 2013 consolidado y por area .xlsx"),
    2014: xlrd.open_workbook("../../../../Outcome Measurement Data/TUBERCULOSIS/COHORTES/cohorte 2014 consolidado y por area .xlsx"),
    2015: xlrd.open_workbook("../../../../Outcome Measurement Data/TUBERCULOSIS/COHORTES/cohorte 2015 consolidado y por areas .xlsx"),
    2016: xlrd.open_workbook("../../../../Outcome Measurement Data/TUBERCULOSIS/COHORTES/cohorte 2016 consolidado y por area .xlsx")
}

In [None]:
UglyDAS2DeptoCode = [
    (0, ['CONSOLIDADO',]),
    (9, ['XELA', 'QUETZALTENANGO']),
    (1, ['GUATE NOR OCC', 'GUATE SUR', 'GUATE SU', 'GUATEM SUR', 'GUATE NOR ORI', 
        'GUATE NOR OR', 'GUATE NOR OC', 'GUATE NOR ORIENTE', 'DAGC', 'DASGC', 
        'GUATEMALA SUR', 
        'HOSPITAL SAN VICENTE',
        'GUATEMALA CENTRAL', 'GUATEMALA NOR OCCIDENTE', 'GUATEMALA NOR ORIENTE']),
    (7, ['SOLOLA', 'SOLOL', 'SOLOLÁ']),
    (21, ['JALAPA', 'JALAP']),
    (5, ['ESCUINTLA', 'ESCUINTL']),
    (16, ['ALT VERAPAZ', 'ALTA VERAPAZ', 'ALTA VERAPA', 'ALTA VERAP']),
    (8, ['TOTO', 'TOTONICAPAN']),
    (13, ['HUEHUE', 'HUEHUETENANGO', 'HUEHUETE']),
    (6, ['SAN ROSA', 'SANTA ROSA', 'ROSA']),
    (12, ['SAN MARCOS', 'SAN MARC', 'SN MARCOS']),
    (3, ['SACATEPEQU', 'SACATEPEQUEZ', "SACATEP"]),
    (2, ['PROGRESO', 'EL PROGRESO']),
    (17, ['PET NORTE', 'PETEN NOR', 'PET SUR ORIENTAL', 'PET SUR OR','PET SUR OCC', 
         'PETEN SUR OCCID', 'PETEN SUR OCC', 'PETEN NORTE', 'PETE SUR ORIENTE',
         'PETEN SUR OR', 'PETÉN SUR OCCIDENTE', 'PETEN NORTE', 'PETEN SUR ORIENTE']),
    (22, ['JUTIAPA', 'JUTIAP']),
    (14, ['QUICHE', 'IXIL','IXCAN', 'QUICHÉ', 'IXI']),
    (20, ['CHIQUIMULA', 'CHIQUI', 'CHIQUIMULS', 'CHIGUIMULA']),
    (19, ['ZACAPA', 'ZACAP']),
    (4, ['CHIMALTENANGO', 'CHIMALTENAGO']),
    (15, ['BAJA VERAP', 'BAJA VERAPA', 'BAJA VER', 'BAJA VERAPAZ']),
    (18, ['IZABAL',]),
    (10, ['SUCHITEPEQUEZ', 'SUCHI']),
    (11, ['REU', 'REHU', 'RETALHULEU'])
]

In [None]:
# Cell types
ERROR = 5
NUMBER = 2
TEXT = 1
EMPTY = 0
MERGED = -1
MERGER = -2

# Helper functions 

def extract_table_A(sheet, row_i, extra_cols = [], col_row_offset = 2, row_jump = 2, col_n = False,
                    col_offset_pre = 2, width = 8):
    """
        Parses a table with percentage rows, which are ignored.
        There will be many parsers because the tables are a mess
    """
    data = []
    col_offset = 0
    while sheet.cell_type(5, col_offset) == EMPTY:
        #print("Found col offset at ", sheet.name, row_i, col_offset)
        col_offset += 1
    
    if width is None:
        width = 0
        while sheet.cell_type(row_i, col_offset + width) != EMPTY:
            width = width + 1
    
    if row_i>sheet.nrows:
        print("Error. Row is beyond sheet size.", sheet.name, row_i)
        return []
    cols = sheet.row_slice(row_i - col_row_offset, col_offset_pre + col_offset, 
                           col_offset_pre + width + col_offset) 
    while True:
        data_row = sheet.row_slice(row_i, col_offset, 8 + col_offset)
        if data_row[0].ctype == EMPTY: 
            break
        i = 0
        for val in data_row[col_offset_pre: col_offset_pre + width]:
            row_data = extra_cols + [data_row[0].value, cols[i].value, val.value if val.value != "" else np.NAN]
            if col_n:
                row_data.append(i)
            data.append(row_data)
            i += 1
        row_i += row_jump
    return data

def process_table_A(cohortes, descriptor):
    cohorte = cohortes[descriptor["year"]]
    data = []
    for sheet in cohorte.sheets():
        code_search = list(filter(lambda item: sheet.name.strip().upper() in item[1] , UglyDAS2DeptoCode))
        if len(code_search) == 1:
            code = code_search[0][0]
        else:
            code = np.NAN
            print("Error when looking for code ", code_search, "in sheet", 
                  sheet.name, "from year", descriptor["year"])
        for table in descriptor["tables"]:
            data.extend(extract_table_A(sheet, table["row"], [descriptor["year"], table["title"], code]))
        if "values" in descriptor:
            col_offset = 0
            while sheet.cell_type(5, col_offset) == EMPTY:
                #print("Found col offset at ", sheet.name, row_i, col_offset)
                col_offset += 1

            for value in descriptor["values"]:
                if (sheet.nrows > value["row"]) & (sheet.ncols > value["col"]+col_offset-1):
                    value_ = sheet.cell_value(value["row"], value["col"]+col_offset-1)
                    data.append( [descriptor["year"], value["title"], code, "", "", value_])
                else:
                    print("Error: value out of bounds (",sheet.nrows,", ",sheet.ncols,") - ", value)
                    print("\t", (sheet.nrows <= value["row"]), (sheet.ncols <= value["col"]))
    return data

In [None]:
tablas_bk = [
    {
        "year": 2016,
        "tables": [
            { "title": "Nuevos Pulmonares BK+", "row": 204
            },
            { "title": "Nuevos Pulmonares pediatricos BK+", "row": 243
            },
            { "title": "Nuevos Pulmonares BK-", "row": 321
            },
            { "title": "Nuevos Pulmonares pediatricos BK-", "row": 339
            },
            { "title": "Nuevos Extrapulmonares BK+", "row": 285
            },
            { "title": "Nuevos Extrapulmonares pediatricos BK+", "row": 303
            },
            { "title": "Nuevos Extrapulmonares BK-", "row": 358
            },
            { "title": "Nuevos Extrapulmonares pediatricos BK-", "row": 377
            },
            { "title": "Nuevos Pulmonares TB/VIH BK+", "row": 395
            },
            { "title": "Retratamiento BK+", "row": 435
            },
            { "title": "Retratamiento Recaidas & BK+", "row": 455
            },
            { "title": "Retratamiento Abandonos Recuperados & BK+", "row": 474
            },
            { "title": "Retratamiento Fracasos & BK+", "row": 493
            },
            { "title": "Retratamiento TB/VIH & BK+", "row": 513
            },
            { "title": "Retratamiento BK-", "row": 594
            },
            { "title": "Retratamiento Recaidas & BK-", "row": 612
            },
            { "title": "Retratamiento Abandonos Recuperados & BK-", "row": 629
            },
            { "title": "Retratamiento Fracasos & BK-", "row": 646
            },
            { "title": "Retratamiento TB/VIH & BK-", "row": 664
            },
            { "title": "Retratamiento Extrapulmonar", "row": 734
            },
            { "title": "Retratamiento Extrapulmonar & TB/VIH", "row": 804
            }
        ],
        "values": [
            { "title": "MDR sensitivity tests on new", "row": 189, "col": 2 },
            { "title": "MDR confirmed cases from new", "row": 190, "col": 2 },
            { "title": "MDR test on previously treated", "row": 191, "col": 2 },
            { "title": "MDR confirmed from previously treated", "row": 192, "col": 2 },
            { "title": "MDR 2nd line treatment begun", "row": 193, "col": 2 },
            { "title": "MDR TB/HIV", "row": 194, "col": 2 },
            { "title": "MDR less than 10 years", "row": 195, "col": 2 },
            
            { "title": "RR sensitivity tests on new", "row": 189, "col": 5 },
            { "title": "RR confirmed cases from new", "row": 190, "col": 5 },
            { "title": "RR test on previously treated", "row": 191, "col": 5 },
            { "title": "RR confirmed from previously treated", "row": 192, "col": 5 },
            { "title": "RR 2nd line treatment begun", "row": 193, "col": 5 },
            { "title": "RR TB/HIV", "row": 194, "col": 5 },
            { "title": "RR less than 10 years", "row": 195, "col": 5 }
        ]
    },
    {
        "year": 2015,
        "tables": [
            { "title": "Nuevos Pulmonares BK+", "row": 176,
            },
            { "title": "Nuevos Pulmonares BK-", "row": 196
            },
            { "title": "Nuevos Extrapulmonares", "row": 213
            },
            { "title": "Nuevos Extrapulmonares TB/VIH", "row": 268
            },
            { "title": "Nuevos Pulmonares TB/VIH BK+", "row": 232
            },
            { "title": "Nuevos Pulmonares TB/VIH BK+", "row": 251
            },
            { "title": "Retratamiento BK+", "row": 287
            },
            { "title": "Retratamiento Recaidas & BK+", "row": 307
            },
            { "title": "Retratamiento Abandonos Recuperados & BK+", "row": 326
            },
            { "title": "Retratamiento Fracasos & BK+", "row": 345
            },
            { "title": "Retratamiento TB/VIH & BK+", "row": 365
            },
            { "title": "Retratamiento BK-", "row": 443
            },
            { "title": "Retratamiento Recaidas & BK-", "row": 460
            },
            { "title": "Retratamiento Abandonos Recuperados & BK-", "row": 477
            },
            { "title": "Retratamiento Fracasos & BK-", "row": 494
            },
            { "title": "Retratamiento TB/VIH & BK-", "row": 512
            },
            { "title": "Retratamiento Extrapulmonar", "row": 582
            },
            { "title": "Retratamiento Extrapulmonar & TB/VIH", "row": 652
            }
        ],
        "values": [
            { "title": "MDR sensitivity tests on new", "row": 161, "col": 2 },
            { "title": "MDR confirmed cases from new", "row": 162, "col": 2 },
            { "title": "MDR test on previously treated", "row": 163, "col": 2 },
            { "title": "MDR confirmed from previously treated", "row": 164, "col": 2 },
            { "title": "MDR 2nd line treatment begun", "row": 165, "col": 2 },
            { "title": "MDR TB/HIV", "row": 166, "col": 2 },
            { "title": "MDR less than 10 years", "row": 167, "col": 2 },
        ]
    },
        {
        "year": 2014,
        "tables": [
            { "title": "Nuevos Pulmonares BK+", "row": 172,
            },
            { "title": "Nuevos Pediatricos BK+", "row": 226
            },
            { "title": "Nuevos Pulmonares BK-", "row": 192
            },
            { "title": "Nuevos Pediatricos BK-", "row": 246
            },
            { "title": "Nuevos Extrapulmonares", "row": 209
            },
            { "title": "Nuevos Pulmonares TB/VIH BK+", "row": 281
            },
            { "title": "Nuevos Pulmonares TB/VIH BK-", "row": 300
            },
            { "title": "Nuevos Extrapulmonares Pediatricos", "row":  263
            },
            { "title": "Nuevos Extrapulmonares TB/VIH", "row":  317
            },
            { "title": "Nuevos Pediatricos TB/VIH BK+", "row":  335
            },
            { "title": "Nuevos Pediatricos TB/VIH BK-", "row":  354
            },
            { "title": "Nuevos Extrapulmonares Pediatricos TB/VIH", "row":  354
            },
            { "title": "Retratamiento BK+", "row": 389
            },
            { "title": "Retratamiento BK-", "row":  545
            },
            { "title": "Retratamiento Pediatricos BK+", "row": 825
            },
            { "title": "Retratamiento Pediatricos BK-", "row": 981
            },
            { "title": "Retratamiento Pediatricos BK+ TB/VIH", "row":  902
            },
            { "title": "Retratamiento Pediatricos BK- TB/VIH", "row": 1052 
            },
            { "title": "Retratamiento BK+ TB/VIH", "row":  467
            },
            { "title": "Retratamiento BK- TB/VIH", "row":  614
            },
            { "title": "Retratamiento Extrapulmonar", "row":  684
            },
            { "title": "Retratamiento Extrapulmonar TB/VIH", "row":  754
            },
            { "title": "Retratamiento Extrapulmonar Pediatricos", "row": 1123
            },
            { "title": "Retratamiento Extrapulmonar Pediatricos TB/VIH", "row":  1194
            }
        ],
        "values": [
            { "title": "MDR sensitivity tests on new", "row": 161, "col": 2 },
            { "title": "MDR confirmed cases from new", "row": 162, "col": 2 },
            { "title": "MDR test on previously treated", "row": 163, "col": 2 },
            { "title": "MDR confirmed from previously treated", "row": 164, "col": 2 },
            { "title": "MDR 2nd line treatment begun", "row": 165, "col": 2 },
            { "title": "MDR TB/HIV", "row": 166, "col": 2 },
            { "title": "MDR less than 10 years", "row": 167, "col": 2 },
        ]
    },
        {
        "year": 2013,
        "tables": [
            { "title": "Nuevos Pulmonares BK+", "row": 149,
            },
            { "title": "Nuevos Pediatricos", "row": 205
            },
            { "title": "Nuevos Pediatricos Extrapulmonares", "row": 257
            },
            { "title": "Nuevos Pediatricos BK+", "row": 222
            },
            { "title": "Nuevos Pulmonares BK-", "row": 169
            },
            { "title": "Nuevos Pediatricos BK-", "row": 240
            },
            { "title": "Nuevos Extrapulmonares", "row": 186
            },
            { "title": "Nuevos Pulmonares TB/VIH BK+", "row": 275
            },
            { "title": "Nuevos Pulmonares TB/VIH BK-", "row": 294
            },
            { "title": "Nuevos Extrapulmonares TB/VIH", "row":  311
            },
            { "title": "Nuevos Pediatricos TB/VIH BK+", "row":  347
            },
            { "title": "Nuevos Pediatricos TB/VIH BK-", "row":  364
            },
            { "title": "Nuevos Extrapulmonares Pediatricos TB/VIH", "row":  382
            },
            { "title": "Retratamiento BK+", "row": 399
            },
            { "title": "Retratamiento TB/VIH BK+", "row": 477
            },
            { "title": "Retratamiento BK-", "row": 555
            },
            { "title": "Retratamiento TB/VIH BK-", "row": 624
            },
            { "title": "Retratamiento Extrapulmonar", "row": 694
            },
            { "title": "Retratamiento Extrapulmonar TB/VIH", "row":  764
            },
            { "title": "Retratamiento Pediatricos", "row": 835 
            },
            { "title": "Retratamiento Pediatricos TB/VIH", "row": 904
            }
        ],
        "values": [
            { "title": "MDR sensitivity tests on new", "row": 134, "col": 2 },
            { "title": "MDR confirmed cases from new", "row": 135, "col": 2 },
            { "title": "MDR test on previously treated", "row": 136, "col": 2 },
            { "title": "MDR confirmed from previously treated", "row":137, "col": 2 },
            { "title": "MDR 2nd line treatment begun", "row": 138, "col": 2 },
            { "title": "MDR TB/HIV", "row": 139, "col": 2 },
            { "title": "MDR less than 14 years", "row": 140, "col": 2 },
        ]
    },
        {
        "year": 2012,
        "tables": [
            { "title": "Nuevos Pulmonares BK+", "row": 150
            },
            { "title": "Nuevos Pediatricos", "row": 206
            },
            { "title": "Nuevos Pediatricos Extrapulmonares", "row": 258
            },
            { "title": "Nuevos Pediatricos BK+", "row": 223
            },
            { "title": "Nuevos Pulmonares BK-", "row": 170
            },
            { "title": "Nuevos Pediatricos BK-", "row": 241
            },
            { "title": "Nuevos Extrapulmonares", "row": 187
            },
            { "title": "Nuevos Pulmonares TB/VIH BK+", "row": 276
            },
            { "title": "Nuevos Pulmonares TB/VIH BK-", "row": 295
            },
            { "title": "Nuevos Extrapulmonares TB/VIH", "row":  312
            },
            { "title": "Nuevos Pediatricos TB/VIH BK+", "row":  348
            },
            { "title": "Nuevos Pediatricos TB/VIH BK-", "row":  365
            },
            { "title": "Nuevos Extrapulmonares Pediatricos TB/VIH", "row":  383
            },
            { "title": "Retratamiento BK+", "row": 400
            },
            { "title": "Retratamiento TB/VIH BK+", "row": 478
            },
            { "title": "Retratamiento BK-", "row": 556
            },
            { "title": "Retratamiento TB/VIH BK-", "row": 625
            },
            { "title": "Retratamiento Extrapulmonar", "row": 695
            },
            { "title": "Retratamiento Extrapulmonar TB/VIH", "row":  765
            },
            { "title": "Retratamiento Pediatricos", "row": 836
            },
            { "title": "Retratamiento Pediatricos TB/VIH", "row": 905
            }
        ],
        "values": [
            { "title": "MDR sensitivity tests on new", "row": 135, "col": 2 },
            { "title": "MDR confirmed cases from new", "row": 136, "col": 2 },
            { "title": "MDR test on previously treated", "row": 137, "col": 2 },
            { "title": "MDR confirmed from previously treated", "row":138, "col": 2 },
            { "title": "MDR 2nd line treatment begun", "row": 139, "col": 2 },
            { "title": "MDR TB/HIV", "row": 140, "col": 2 },
            { "title": "MDR less than 14 years", "row": 141, "col": 2 },
        ]
    }
]

In [None]:
data = []
for descriptor in tablas_bk:
    data.extend( process_table_A(cohortes, descriptor) )

In [None]:
datadf = pd.DataFrame(data, columns = ["year", "table", "deptocode", "row_name", "col_name", "value"])

In [None]:
datadf.shape

In [None]:
datadf["row_name"] = datadf.row_name.map(lambda x: \
         "ABANDONO/PERDIDA_SEGM" if x in ["ABANDONOS", "PERIDA EN EL SEGUIMIENTO", "PERIDA EN EL SEGUMIENTO"]\
        else "FRACASO_TERAPEUTICO" if x in ["FRACASOS", "FRACASO TERAPEUTICO"] \
        else "TRANSFERIDOS" if x in ["TRANSFERIDOS", "TRASLADOS"] \
        else "CURADOS" if x in["CURADO", "CURADOS"] \
         else x)
datadf["col_name"] = datadf.col_name.map(lambda x: \
         "TOTAL" if x in ["TOTAL", "TOTAL TRIMESTRES"]\
         else x)

datadf["row_name_B"] = datadf.row_name.map(lambda x: \
        "LOST TO FOLLOW-UP"  if x in ["ABANDONO/PERDIDA_SEGM"]\
        else "COMPLETED TREATMENT" if x in ["FRACASO_TERAPEUTICO", "CURADOS", "TRATAMIENTOS_COMPLETOS", 
                                 "TRATAMIENTOS COMPLETOS"] \
        else "REFERRED" if x in ["TRANSFERIDOS", "TRASLADOS"] \
        else "DEATHS" if x == "FALLECIDOS" \
         else x)

datadf["value"] = datadf.value.map(lambda x: x if x!="" else np.NAN).astype(float)

In [None]:
# These are the total we expect to encounter: 
print(np.array([ [cohortes[2012].sheet_by_index(0).cell_value(y,x) for y in range(3,25,2)] for x in [1,6] ]).T)
print(np.array([ [cohortes[2013].sheet_by_index(0).cell_value(y,x) for y in range(3,25,2)] for x in [1,6] ]).T)
print(np.array([ [cohortes[2014].sheet_by_index(0).cell_value(y,x) for y in range(3,25,2)] for x in [1,6] ]).T)
print(np.array([ [cohortes[2015].sheet_by_index(0).cell_value(y,x) for y in range(3,25,2)] for x in [1,6] ]).T)
print(np.array([ [cohortes[2016].sheet_by_index(0).cell_value(y,x) for y in range(2,25,2)] for x in [1,6] ]).T)

In [None]:
# Build fields Nuevos Pediatricos
new_data = datadf[(datadf.year == 2016) & 
       (datadf.table.isin(["Nuevos Pulmonares pediatricos BK+",
                          "Nuevos Pulmonares pediatricos BK-"]))]\
    .groupby(["year", "deptocode", "row_name", "row_name_B", "col_name"]).value.sum().reset_index()\
    .assign(table="Nuevos Pediatricos")
datadf = datadf.append(new_data, ignore_index=True)

In [None]:
# Build fields Nuevos Pediatricos
datadf["temp_multiplier"] = 1
datadf.loc[datadf.table.isin(["Nuevos Pulmonares pediatricos BK+",
                          "Nuevos Pulmonares pediatricos BK-"]), "temp_multiplier" ] = -1
new_data = datadf[(datadf.year == 2016) & 
       (datadf.table.isin(["Nuevos Pulmonares pediatricos BK+",
                          "Nuevos Pulmonares BK+"]))]\
    .groupby(["year", "deptocode", "row_name", "row_name_B", "col_name"])\
    .apply(lambda x: (x.temp_multiplier * x.value).sum()).rename("value")\
    .reset_index().assign(table = "Nuevos Pulmonares BK+")
datadf = datadf[((datadf.year == 2016) & 
       datadf.table.isin(["Nuevos Pulmonares BK+"])) == False].append(new_data, ignore_index=True)

In [None]:
new_data = datadf[(datadf.year == 2016) & 
       (datadf.table.isin(["Nuevos Pulmonares pediatricos BK-",
                          "Nuevos Pulmonares BK-"]))]\
    .groupby(["year", "deptocode", "row_name", "row_name_B", "col_name"])\
    .apply(lambda x: (x.temp_multiplier * x.value).sum()).rename("value")\
    .reset_index().assign(table = "Nuevos Pulmonares BK-")
datadf = datadf[((datadf.year == 2016) & 
       datadf.table.isin(["Nuevos Pulmonares BK-"])) == False].append(new_data, ignore_index=True)

In [None]:
# Build fields Nuevos Extrapulmonares
new_data = datadf[(datadf.year == 2016) & 
       (datadf.table.isin(["Nuevos Extrapulmonares BK+",
                          "Nuevos Extrapulmonares BK-"]))]\
    .groupby(["year", "deptocode", "row_name", "row_name_B", "col_name"]).value.sum().reset_index()\
    .assign(table="Nuevos Extrapulmonares")
datadf = datadf.append(new_data, ignore_index=True)

In [None]:
# Build fields Nuevos Pediatricos
new_data = datadf[(datadf.year == 2014) & 
       (datadf.table.isin(["Nuevos Pediatricos BK+",
                          "Nuevos Pediatricos BK-",
                          "Nuevos Extrapulmonares Pediatricos"]))]\
    .groupby(["year", "deptocode", "row_name", "row_name_B", "col_name"]).value.sum().reset_index()\
    .assign(table="Nuevos Pediatricos")
datadf = datadf[((datadf.year == 2014) & 
       datadf.table.isin(["Nuevos Pediatricos"])) == False].append(new_data, ignore_index=True)

In [None]:
# Build fields Nuevos TB VIH
# 2016 163
new_data = datadf[ 
       (datadf.table.isin([ "Nuevos Pulmonares TB/VIH BK+",
                            "Nuevos Pulmonares TB/VIH BK-",
                            "Nuevos Extrapulmonares TB/VIH",
                           "Nuevos Pediatricos TB/VIH BK+",
                           "Nuevos Pediatricos TB/VIH BK-",
                           "Nuevos Extrapulmonares Pediatricos TB/VIH"
                          ]))]\
    .groupby(["year", "deptocode", "row_name", "row_name_B", "col_name"]).value.sum().reset_index()\
    .assign(table="Nuevos TB/VIH")
datadf = datadf[(
       datadf.table.isin(["Nuevos TB/VIH"])) == False].append(new_data, ignore_index=True)

In [None]:
# Build fields Retratamientos
# 2016 163
new_data = datadf[ 
       (datadf.table.isin([ "Retratamiento BK+",
                            "Retratamiento BK-",
                            "Retratamiento Extrapulmonares"
                          ]))]\
    .groupby(["year", "deptocode", "row_name",  "row_name_B", "col_name"]).value.sum().reset_index()\
    .assign(table="Retratamiento")
datadf = datadf[(
       datadf.table.isin(["Retratamiento"])) == False].append(new_data, ignore_index=True)

In [None]:
del datadf["temp_multiplier"]

In [None]:
datadf.to_csv("../../../../Outcome Measurement Data/TUBERCULOSIS/GTM - Tx cohort data 2012-2016.csv")

In [None]:
datadf[datadf.table.apply(lambda x: x.startswith("MDR"))]

In [None]:
# Lets start with the missing basic data for pediatric and extrapulmonares cases: 
temp = datadf[#(datadf.row_name_.isin(["FALLECIDOS"])) & 
              (datadf.col_name.isin(["TOTAL"])) & 
       (datadf.table.isin(["Nuevos Pulmonares BK+", "Nuevos Pulmonares BK-", "Nuevos Pediatricos", 
                          "Nuevos Extrapulmonares", "Nuevos TB/VIH", "Retratamiento"
                          ])) &
       (datadf.deptocode!=0)].\
    groupby(["year"]).value.sum()
#temp["total"] = temp.sum(axis=1)
#temp = temp.fillna(0)
#temp = \
#    (np.round(temp.divide(temp.total , axis="rows")*100, decimals=0)).astype(int).astype(str) + "%"
temp

# Otros cuadros

In [None]:
cuadros = [
    {
        "year": 2012, 
        "row": 974,
        "table": "SR",
        "col_row_offset": 1
    },
    {
        "year": 2012, 
        "row": 1016,
        "table": "Nuevos BK+",
        "col_row_offset": 1
    },
    {
        "year": 2012, 
        "row": 1057,
        "table": "Nuevos BK-",
        "col_row_offset": 1
    },
    {
        "year": 2012, 
        "row": 1098,
        "table": "Nuevos Extrapulmonares",
        "col_row_offset": 1
    },
    {
        "year": 2012, 
        "row": 1139,
        "table": "Pediatricos",
        "col_row_offset": 1
    },
    {
        "year": 2012, 
        "row": 1180,
        "table": "TB/VIH Iniciaron TARV",
        "col_row_offset": 1
    },
    {
        "year": 2012, 
        "row": 1221,
        "table": "TB/VIH Tx Preventivo",
        "col_row_offset": 1
    },
    {
        "year": 2012, 
        "row": 1266,
        "table": "SR Extension Cobertura",
        "col_row_offset": 2
    },
    {
        "year": 2012, 
        "row": 1304,
        "table": "Nuevos BK+ Extension Cobertura",
        "col_row_offset": 2
    },
    {
        "year": 2012, 
        "row": 1342,
        "table": "Tamizaje Contactos",
        "col_row_offset": 2
    },
    {
        "year": 2012, 
        "row": 1377,
        "table": "Contactos con TB",
        "col_row_offset": 2
    },
    {
        "year": 2012, 
        "row": 1413,
        "table": "Contactos Tx 0-10a",
        "col_row_offset": 2
    },
    {
        "year": 2012, 
        "row": 1451,
        "table": "Centros de salud EATB",
        "col_row_offset": 2
    },
    {
        "year": 2012, 
        "row": 1489,
        "table": "Puestos de salud EATB",
        "col_row_offset": 2
    },
    {
        "year": 2012, 
        "row": 1525,
        "table": "Hospitales EATB",
        "col_row_offset": 2
    },
    {
        "year": 2012, 
        "row": 1558,
        "table": "Jurisdicciones EATB",
        "col_row_offset": 2
    },
    {
        "year": 2012, 
        "row": 1595,
        "table": "Otros EATB",
        "col_row_offset": 2
    },
    {
        "year": 2012, 
        "row": 1633,
        "table": "Laboratorios",
        "col_row_offset": 2
    },
    {
        "year": 2012, 
        "row": 1672,
        "table": "Laboratorios baciloscopias",
        "col_row_offset": 2
    },
    {
        "year": 2012, 
        "row": 1710,
        "table": "Laboratorios cultivo",
        "col_row_offset": 2
    },
    {
        "year": 2012, 
        "row": 1747,
        "table": "Laboratorios sensibilidad drogas",
        "col_row_offset": 2
    },
    {
        "year": 2013, 
        "row": 973,
        "table": "SR",
        "col_row_offset": 1
    },
    {
        "year": 2013, 
        "row": 1015,
        "table": "Nuevos BK+",
        "col_row_offset": 1
    },
    {
        "year": 2013, 
        "row": 1056,
        "table": "Nuevos BK-",
        "col_row_offset": 1
    },
    {
        "year": 2013, 
        "row": 1097,
        "table": "Nuevos Extrapulmonares",
        "col_row_offset": 1
    },
    {
        "year": 2013, 
        "row": 1138,
        "table": "Pediatricos",
        "col_row_offset": 1
    },
    {
        "year": 2013, 
        "row": 1179,
        "table": "TB/VIH Iniciaron TARV",
        "col_row_offset": 1
    },
    {
        "year": 2013, 
        "row": 1265,
        "table": "SR Extension Cobertura",
        "col_row_offset": 2
    },
    {
        "year": 2013, 
        "row": 1303,
        "table": "Nuevos BK+ Extension Cobertura",
        "col_row_offset": 2
    },
    {
        "year": 2013, 
        "row": 1341,
        "table": "Tamizaje Contactos",
        "col_row_offset": 2
    },
    {
        "year": 2013, 
        "row": 1376,
        "table": "Contactos con TB",
        "col_row_offset": 2
    },
    {
        "year": 2013, 
        "row": 142,
        "table": "Contactos Tx 0-5a",
        "col_row_offset": 2
    },
    {
        "year": 2013, 
        "row": 1450,
        "table": "Centros de salud EATB",
        "col_row_offset": 2
    },
    {
        "year": 2013, 
        "row": 1488,
        "table": "Puestos de salud EATB",
        "col_row_offset": 2
    },
    {
        "year": 2013, 
        "row": 1524,
        "table": "Hospitales EATB",
        "col_row_offset": 2
    },
    {
        "year": 2013, 
        "row": 1561,
        "table": "Jurisdicciones EATB",
        "col_row_offset": 2
    },
    {
        "year": 2013, 
        "row": 1598,
        "table": "Otros EATB",
        "col_row_offset": 2
    },
    {
        "year": 2014, 
        "row": 1263,
        "table": "SR"
    },
    {
        "year": 2014, 
        "row": 1305,
        "table": "Nuevos BK+"
    },
    {
        "year": 2014, 
        "row": 1346,
        "table": "Nuevos BK-"
    },
    {
        "year": 2014, 
        "row": 1387,
        "table": "Nuevos Extrapulmonares"
    },
    {
        "year": 2014, 
        "row": 1428,
        "table": "Pediatricos"
    },
    {
        "year": 2014, 
        "row": 1469,
        "table": "TB/VIH Iniciaron TARV"
    },
    {
        "year": 2014, 
        "row": 1553,
        "table": "SR Extension Cobertura",
        "col_row_offset": 2
    },
    {
        "year": 2014, 
        "row": 1593,
        "table": "Nuevos BK+ Extension Cobertura",
        "col_row_offset": 2
    },
    {
        "year": 2014, 
        "row": 1631,
        "table": "Tamizaje Contactos",
        "col_row_offset": 2
    },
    {
        "year": 2014, 
        "row": 1666,
        "table": "Contactos con TB",
        "col_row_offset": 2
    },
    {
        "year": 2014, 
        "row": 1702,
        "table": "Contactos Tx 0-5a",
        "col_row_offset": 2
    },
    {
        "year": 2014, 
        "row": 1740,
        "table": "Centros de salud EATB",
        "col_row_offset": 2
    },
    {
        "year": 2014, 
        "row": 1778,
        "table": "Puestos de salud EATB",
        "col_row_offset": 2
    },
    {
        "year": 2014, 
        "row": 1814,
        "table": "Hospitales EATB",
        "col_row_offset": 2
    },
    {
        "year": 2014, 
        "row": 1851,
        "table": "Jurisdicciones EATB",
        "col_row_offset": 2
    },
    {
        "year": 2014, 
        "row": 1888,
        "table": "Otros EATB",
        "col_row_offset": 2
    },
    {
        "year": 2014, 
        "row": 1926,
        "table": "Laboratorios",
        "col_row_offset": 2
    },
    {
        "year": 2014, 
        "row": 1965,
        "table": "Laboratorios baciloscopias",
        "col_row_offset": 2
    },
    {
        "year": 2014, 
        "row": 2003,
        "table": "Laboratorios cultivo",
        "col_row_offset": 2
    },
    {
        "year": 2014, 
        "row": 2040,
        "table": "Laboratorios sensibilidad drogas",
        "col_row_offset": 2
    },
    {
        "year": 2015, 
        "row": 1161,
        "table": "SR"
    },
    {
        "year": 2015, 
        "row": 1199,
        "table": "Nuevos BK+"
    },
    {
        "year": 2015, 
        "row": 1236,
        "table": "Nuevos BK-"
    },
    {
        "year": 2015, 
        "row": 1273,
        "table": "Nuevos Extrapulmonares"
    },
    {
        "year": 2015, 
        "row": 1311,
        "table": "TB/VIH Iniciaron TARV"
    },
    {
        "year": 2015, 
        "row": 1350,
        "table": "VIH Tx Preventivo"
    },
    {
        "year": 2015, 
        "row": 1429,
        "table": "SR Extension Cobertura",
        "col_row_offset": 2
    },
    {
        "year": 2015, 
        "row": 1466,
        "table": "Nuevos BK+ Extension Cobertura",
        "col_row_offset": 2
    },
    {
        "year": 2015, 
        "row": 1504,
        "table": "Tamizaje Contactos",
        "col_row_offset": 2
    },
    {
        "year": 2015, 
        "row": 1539,
        "table": "Contactos con TB",
        "col_row_offset": 2
    },
    {
        "year": 2015, 
        "row": 1575,
        "table": "Contactos Tx 0-5a",
        "col_row_offset": 2
    },
    {
        "year": 2015, 
        "row": 1802,
        "table": "Laboratorios",
        "col_row_offset": 2
    },
    {
        "year": 2015, 
        "row": 1841,
        "table": "Laboratorios baciloscopias",
        "col_row_offset": 2
    },
    {
        "year": 2015, 
        "row": 1879,
        "table": "Laboratorios cultivo",
        "col_row_offset": 2
    },
    {
        "year": 2015, 
        "row": 1916,
        "table": "Laboratorios sensibilidad drogas",
        "col_row_offset": 2
    },
    
    {
        "year": 2016, 
        "row": 1313,
        "table": "SR"
    },
    {
        "year": 2016, 
        "row": 1352,
        "table": "Nuevos BK+"
    },
    {
        "year": 2016, 
        "row": 1391,
        "table": "TB/VIH Iniciaron TARV"
    },
    {
        "year": 2016, 
        "row": 1433,
        "table": "SR Extension Cobertura",
        "col_row_offset": 2
    },
    {
        "year": 2016, 
        "row": 1508,
        "table": "Tamizaje Contactos",
        "col_row_offset": 2
    },
    {
        "year": 2016, 
        "row": 1543,
        "table": "Contactos con TB",
        "col_row_offset": 2
    },
    {
        "year": 2016, 
        "row": 1579,
        "table": "Contactos Tx 0-5a",
        "col_row_offset": 2
    },
    {
        "year": 2016, 
        "row": 1617,
        "table": "Centros de salud EATB",
        "col_row_offset": 2
    },
    {
        "year": 2016, 
        "row": 1655,
        "table": "Puestos de salud EATB",
        "col_row_offset": 2
    },
    {
        "year": 2016, 
        "row": 1691,
        "table": "ExtCob EATB",
        "col_row_offset": 2
    },
    {
        "year": 2016, 
        "row": 1728,
        "table": "Otros EATB",
        "col_row_offset": 2
    },
    {
        "year": 2016, 
        "row": 1765,
        "table": "Hospitales EATB",
        "col_row_offset": 2
    }
]

In [None]:
extra_data = []
for cuadro in cuadros:
    extra_data.extend(extract_table_A(cohortes[cuadro["year"]].sheet_by_index(0), cuadro["row"], 
                                extra_cols = [cuadro["year"], cuadro["table"]],
                                col_row_offset= cuadro.get("col_row_offset", 1), 
                                row_jump = cuadro.get("row_jump", 1), col_n = True, 
                                col_offset_pre = 1, width = None))

In [None]:
len(extra_data)

In [None]:
extradf = pd.DataFrame(extra_data, columns = ["year", "table", "das", "column", "value", "col_index"])
extradf = extradf[(extradf.column.isin(["Total", "%"]) == False) & 
                  (extradf.das.isin(["Total", "%", ""]) == False) & 
                  ((extradf.column != "") & (extradf.value.isna() == False))]

In [None]:
def getDeptoCode(inputStr):
    search = list(filter(lambda item: inputStr.strip().upper() in item[1] , UglyDAS2DeptoCode))
    if len(search) == 1:
        return search[0][0]
    return ""
extradf["deptocode"] = extradf.das.apply(getDeptoCode) 
# There are some rows with DAS names set as "Hospital". We don't know the location of those hospitals.

In [None]:
extradf.shape

In [None]:
extradf.to_csv("../../../../Outcome Measurement Data/TUBERCULOSIS/COHORTES/GTM - Tx Cohort Data - Extra tables.csv", index = False)

In [None]:
extradf.groupby(["table"]).deptocode.unique().map(len)