In [1]:
import sys
!{sys.executable} -m pip install lxml
import lxml
import pandas as pd
import random
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
from tqdm.notebook import tqdm
tqdm.pandas()





In [2]:
from patsy import dmatrices
import statsmodels.api as sm
import math

def train_model(expr, sub_df,  verbose = False):
    y_train, X_train = dmatrices(expr, sub_df, return_type='dataframe')
    logit_model = sm.Logit(endog=y_train, exog=X_train)
    logit_results = logit_model.fit()
    if verbose: print(logit_results.summary())
    coefs = dict(zip(["Intercept"] + parametros,logit_results.params.values))
    conf_int = logit_results.conf_int(0.05).loc[out]
    odds =  math.exp(coefs[out]*1)
    lower_odds = math.exp(conf_int[1])
    upper_odds = math.exp(conf_int[0])
    
    results_as_html = logit_results.summary().tables[0].as_html()
    model_results_as_df = pd.read_html(results_as_html, header=0, index_col=0)[0]
    results_as_html = logit_results.summary().tables[1].as_html()
    model_results_as_df_2 = pd.read_html(results_as_html, header=0, index_col=0)[0]   
    
    return 1- odds, 1-lower_odds, 1-upper_odds, model_results_as_df, model_results_as_df_2

In [3]:
def get_matching_pairs(treated_df, non_treated_df, scaler=True):
    """
    Función de matcheo robada de https://stats.stackexchange.com/questions/206832/matched-pairs-in-python-propensity-score-matching 
    """
    treated_x = treated_df.values #Hay que sacar el ID?
    non_treated_x = non_treated_df.values

    if scaler == True:
        scaler = StandardScaler()

    if scaler:
        scaler.fit(treated_x)
        treated_x = scaler.transform(treated_x)
        non_treated_x = scaler.transform(non_treated_x)

    nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(non_treated_x)
    distances, indices = nbrs.kneighbors(treated_x)
    indices = indices.reshape(indices.shape[0])
    matched = non_treated_df.iloc[indices]
    return matched

In [4]:
def run_test_negative(df, out = "VACUNADO_AL_MENOS_UNA"):

    casos = df[df.CASO].copy()
    controles = df[~df.CASO].copy()
    controles_match = get_matching_pairs(casos,controles).copy()
    casos["match"] = range(len(casos))
    controles_match["match"] = range(len(controles_match))
    casos["ID"] = casos.index
    controles_match["ID"] = controles_match.index

    # HORIZONTAL, PARA COMPARAR
    df_sampleado_horizontal = casos.merge(controles_match, on = "match", suffixes= ["_caso","_control"])
    df_sampleado_horizontal
    mismatch = list(df_sampleado_horizontal[df_sampleado_horizontal.DEPTO_CODE_caso != df_sampleado_horizontal.DEPTO_CODE_control].ID_caso)
    mismatch = mismatch + list(df_sampleado_horizontal[df_sampleado_horizontal.DEPTO_CODE_caso != df_sampleado_horizontal.DEPTO_CODE_control].ID_control)

    # VERTICAL
    df_sampleado = casos.append(controles_match)
    df_sampleado.drop(columns = ["ID"], inplace = True)

    # UNA DOSIS
    df_merged = df_sampleado.merge(vacunados_una_dosis[["ID","FECHA_APLICACION","LABORATORIO"]], on = "ID", how = "left")
    # DOS DOSIS
    df_merged_dos = df_merged.merge(vacunados_dos_dosis[["ID","FECHA_APLICACION_2"]], on = "ID", how = "left")

    df_merged_dos["FECHA_APLICACION"] = pd.to_numeric(pd.to_datetime(df_merged_dos["FECHA_APLICACION"]).dt.strftime("%Y%m%d"), errors='coerce')
    df_merged_dos["FECHA_APLICACION_2"] = pd.to_numeric(pd.to_datetime(df_merged_dos["FECHA_APLICACION_2"]).dt.strftime("%Y%m%d"), errors='coerce')

    STATUS = []
    VACUNADO_COMPLETO = []
    VACUNADO_UNA_DOSIS = []
    VACUNADO_AL_MENOS_UNA = []
    RECIEN_VACUNADO = []

    for index, row in tqdm(df_merged_dos.iterrows(), total = len(df_merged)):
        dif_a_primera_dosis = row.MIN_FTM  - row.FECHA_APLICACION
        dif_a_segunda_dosis = row.MIN_FTM - row.FECHA_APLICACION_2

        if dif_a_segunda_dosis > 14: #MÁS DE 14 DÍAS DE LA SEGUNDA DOSIS
            STATUS.append("DOS DOSIS")
            VACUNADO_UNA_DOSIS.append(False)
            VACUNADO_COMPLETO.append(True)
            VACUNADO_AL_MENOS_UNA.append(True)
            RECIEN_VACUNADO.append(False)  

        elif dif_a_primera_dosis > 21: #MÁS DE 21 DÍAS DE LA PRIMERA Y MENOS O SIN 14 DÍAS DE LA SEGUNDA.
            STATUS.append("UNA DOSIS")
            VACUNADO_UNA_DOSIS.append(True)
            VACUNADO_COMPLETO.append(False)
            VACUNADO_AL_MENOS_UNA.append(True)
            RECIEN_VACUNADO.append(False)

        elif dif_a_primera_dosis > 0:
            STATUS.append("RECIEN VACUNADO")
            VACUNADO_UNA_DOSIS.append(False)
            VACUNADO_COMPLETO.append(False)
            VACUNADO_AL_MENOS_UNA.append(False)        
            RECIEN_VACUNADO.append(True)

        else:
            STATUS.append("NO VACUNADO")
            VACUNADO_UNA_DOSIS.append(False)
            VACUNADO_COMPLETO.append(False)
            VACUNADO_AL_MENOS_UNA.append(False)
            RECIEN_VACUNADO.append(False)


    df_merged_dos["STATUS"] = STATUS
    df_merged_dos["VACUNADO_COMPLETO"] = VACUNADO_COMPLETO
    df_merged_dos["VACUNADO_UNA_DOSIS"] = VACUNADO_UNA_DOSIS
    df_merged_dos["VACUNADO_AL_MENOS_UNA"] = VACUNADO_AL_MENOS_UNA
    df_merged_dos["RECIEN_VACUNADO"] = RECIEN_VACUNADO

    df_input = df_merged_dos[["MUJER","DEPTO_CODE","EDAD","CASO","SIN_COMORB",
                              "VACUNADO_COMPLETO","VACUNADO_UNA_DOSIS",
                              "VACUNADO_AL_MENOS_UNA","RECIEN_VACUNADO"]].copy()


    for col in df_input.columns:
        if df_input[col].dtype == bool:
            df_input[col] = df_input[col].map({True:1, False:0})



    variable = "CASO"

    out = out

    parametros = [out]

    parametros_str = parametros[0]
    for i in parametros[1:]: parametros_str += " + " + i
    expr = variable + " ~ " + parametros_str 



    df_input_filtrado = df_input[df_input.RECIEN_VACUNADO == 0]

    controles_a_excluir = {"VACUNADO_COMPLETO":"VACUNADO_UNA_DOSIS", 
                           "VACUNADOS_UNA_DOSIS":"VACUNADO_COMPLETO"}

    if out in controles_a_excluir:
        df_input_filtrado = df_input_filtrado[df_input_filtrado[controles_a_excluir[out]]==0]

    ef, lower, upper, model_results_as_df,model_results_as_df_2 = train_model(expr, df_input_filtrado[["CASO",parametros[0]]])
    return ef, lower, upper, model_results_as_df,model_results_as_df_2

In [5]:
df = pd.read_csv("snvs_casos.csv", index_col = "ID")

In [6]:
df = df[df.AMBA]

In [7]:
print(len(df))
df = df[~df.index.duplicated(keep='first')]
print(len(df))

2245299
1956089


In [8]:
df["DEPARTAMENTO_RESIDENCIA"] = pd.Categorical(df.DEPARTAMENTO_RESIDENCIA)
df['DEPTO_CODE'] = df.DEPARTAMENTO_RESIDENCIA.cat.codes

In [9]:
df.drop("DEPARTAMENTO_RESIDENCIA", axis = 1, inplace = True)

In [10]:
df.drop("FALLECIDO", axis = 1, inplace = True)

In [11]:
vacunados_una_dosis = pd.read_csv("nro_dosis_1.csv", header = None)
vacunados_una_dosis.columns = ["DNI","SEXO","FECHA_APLICACION","LABORATORIO"]
vacunados_una_dosis["ID"] = vacunados_una_dosis.DNI.astype(str) + "_" + vacunados_una_dosis.SEXO

  exec(code_obj, self.user_global_ns, self.user_ns)


In [12]:
vacunados_dos_dosis = pd.read_csv("nro_dosis_2.csv", header = None)
vacunados_dos_dosis.columns = ["DNI","SEXO","FECHA_APLICACION_2"]
vacunados_dos_dosis["ID"] = vacunados_dos_dosis.DNI.astype(str) + "_" + vacunados_dos_dosis.SEXO

In [13]:
df = df[~df.EDAD.isnull()]

In [14]:
df["SEMANA"] = pd.to_datetime(df['MIN_FTM'], format = "%Y%m%d").dt.week
df.loc[df.SEMANA == 53, "SEMANA"] = 1

  """Entry point for launching an IPython kernel.


In [15]:
#df = df[df.MIN_FTM > 20210705]

In [16]:
res = pd.DataFrame()

for week in tqdm(set(df.SEMANA)):
    sub_df = df[df.SEMANA == week]
    out = "VACUNADO_AL_MENOS_UNA"
    parametros = [out]

    ef, lower, upper, model_results_as_df,model_results_as_df_2 = run_test_negative(df = sub_df, out = out)
    res = res.append({"SEMANA":week, "ef":ef,"lower":lower, "upper":upper}, ignore_index = True)
    print("Semana: {} || Ef: {:.2f} ({:.2f}-{:.2f})".format(week,ef,lower,upper))

  0%|          | 0/27 [00:00<?, ?it/s]

  0%|          | 0/39615 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.693146
         Iterations 3
Semana: 1 || Ef: 0.11 (-1.30-0.66)


  0%|          | 0/29657 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.693128
         Iterations 4
Semana: 2 || Ef: 0.44 (-0.66-0.81)


  0%|          | 0/27502 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.693104
         Iterations 5
Semana: 3 || Ef: 0.63 (-0.41-0.90)


  0%|          | 0/26235 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.693146
         Iterations 3
Semana: 4 || Ef: 0.07 (-0.93-0.55)


  0%|          | 0/22650 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.692958
         Iterations 4
Semana: 5 || Ef: 0.38 (0.14-0.55)


  0%|          | 0/20776 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.692922
         Iterations 4
Semana: 6 || Ef: 0.41 (0.17-0.58)


  0%|          | 0/19515 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.692573
         Iterations 5
Semana: 7 || Ef: 0.60 (0.41-0.74)


  0%|          | 0/20746 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.692217
         Iterations 5
Semana: 8 || Ef: 0.73 (0.57-0.83)


  0%|          | 0/23098 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.692971
         Iterations 4
Semana: 9 || Ef: 0.26 (0.09-0.40)


  0%|          | 0/25769 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.692715
         Iterations 4
Semana: 10 || Ef: 0.37 (0.23-0.48)


  0%|          | 0/29610 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.691903
         Iterations 4
Semana: 11 || Ef: 0.54 (0.45-0.62)


  0%|          | 0/46645 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.691894
         Iterations 4
Semana: 12 || Ef: 0.48 (0.41-0.54)


  0%|          | 0/73242 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.691849
         Iterations 4
Semana: 13 || Ef: 0.35 (0.31-0.39)


  0%|          | 0/112896 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.691727
         Iterations 4
Semana: 14 || Ef: 0.33 (0.30-0.36)


  0%|          | 0/110337 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.691298
         Iterations 4
Semana: 15 || Ef: 0.36 (0.33-0.39)


  0%|          | 0/103248 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.691066
         Iterations 4
Semana: 16 || Ef: 0.38 (0.35-0.41)


  0%|          | 0/80572 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.691795
         Iterations 4
Semana: 17 || Ef: 0.30 (0.26-0.33)


  0%|          | 0/74909 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.692552
         Iterations 3
Semana: 18 || Ef: 0.18 (0.15-0.21)


  0%|          | 0/80778 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.692625
         Iterations 3
Semana: 19 || Ef: 0.17 (0.13-0.20)


  0%|          | 0/97233 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.692367
         Iterations 4
Semana: 20 || Ef: 0.20 (0.17-0.23)


  0%|          | 0/85121 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.691541
         Iterations 4
Semana: 21 || Ef: 0.27 (0.24-0.30)


  0%|          | 0/73155 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.692322
         Iterations 3
Semana: 22 || Ef: 0.19 (0.16-0.22)


  0%|          | 0/62236 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.692247
         Iterations 3
Semana: 23 || Ef: 0.19 (0.16-0.22)


  0%|          | 0/48894 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.692123
         Iterations 4
Semana: 24 || Ef: 0.19 (0.16-0.23)


  0%|          | 0/48114 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.691044
         Iterations 4
Semana: 25 || Ef: 0.26 (0.23-0.30)


  0%|          | 0/40418 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.691566
         Iterations 3
Semana: 26 || Ef: 0.21 (0.18-0.25)


  0%|          | 0/26489 [00:00<?, ?it/s]

Optimization terminated successfully.
         Current function value: 0.692239
         Iterations 3
Semana: 27 || Ef: 0.16 (0.12-0.20)


In [18]:
res.to_csv("indicador_efectividad.csv", index = None)