In [26]:
import duckdb
import pandas as pd
import numpy as np
from src.preprocess.etl import get_dataframe
from src.monitoring.adversarial import train_adversarial
from src.constants import PATH_DATABASE, MONTHS_BASELINE, MONTHS_INFERENCE, PARAMS


import warnings

warnings.filterwarnings("ignore")

In [27]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
con = duckdb.connect(database=PATH_DATABASE, read_only=True)

In [4]:
all_months = MONTHS_BASELINE + MONTHS_INFERENCE
all_months = [str(month) for month in all_months]
where_clause = ", ".join(all_months)
where_clause

'202007, 202008, 202009, 202010, 202011, 202012, 202101, 202102, 202103, 202104, 202105, 202106, 202107, 202108, 202109'

In [5]:
df = con.sql(
    f"""
    SELECT
        *
    FROM competencia_03
    WHERE foto_mes IN ({where_clause})
    """
).to_df()

In [6]:
df.head()

Unnamed: 0,numero_de_cliente,foto_mes,active_quarter,cliente_vip,internet,cliente_edad,cliente_antiguedad,mrentabilidad,mrentabilidad_annual,mcomisiones,...,Visa_mconsumototal,Visa_cconsumos,Visa_cadelantosefectivo,Visa_mpagominimo,clase_ternaria_202104,clase_ternaria_202105,clase_ternaria_202106,clase_ternaria_202107,clase_ternaria_202108,clase_ternaria_202109
0,39895473,202010,1,0,0,86,317,3890.92,102614.4,3504.48,...,83776.04,13.0,0.0,3343.05,CONTINUA,CONTINUA,CONTINUA,CONTINUA,CONTINUA,CONTINUA
1,39897650,202010,1,0,0,78,287,2596.92,23324.65,2519.66,...,,,,0.0,CONTINUA,CONTINUA,CONTINUA,CONTINUA,CONTINUA,CONTINUA
2,39898154,202010,1,0,0,50,173,25485.94,220085.9,2880.73,...,23068.12,9.0,0.0,1489.71,CONTINUA,CONTINUA,CONTINUA,CONTINUA,CONTINUA,CONTINUA
3,39899418,202010,1,0,1,60,287,2972.55,27917.0,927.91,...,,,,,BAJA+1,BAJA+1,BAJA+1,BAJA+1,BAJA+1,BAJA+1
4,39899785,202010,1,0,1,56,270,2441.04,13305.49,323.99,...,4973.52,2.0,0.0,10498.35,CONTINUA,CONTINUA,CONTINUA,CONTINUA,CONTINUA,CONTINUA


In [7]:
np.sort(df["foto_mes"].unique())

array([202007, 202008, 202009, 202010, 202011, 202012, 202101, 202102,
       202103, 202104, 202105, 202106, 202107, 202108, 202109])

In [24]:
drop_cols = [f"clase_ternaria_{i}" for i in MONTHS_INFERENCE]
drop_cols

['clase_ternaria_202104',
 'clase_ternaria_202105',
 'clase_ternaria_202106',
 'clase_ternaria_202107',
 'clase_ternaria_202108',
 'clase_ternaria_202109']

In [9]:
ground_truth = df[["numero_de_cliente","foto_mes", "clase_ternaria_202109"]]
ground_truth = ground_truth[ground_truth["foto_mes"]<=202107]
ground_truth

Unnamed: 0,numero_de_cliente,foto_mes,clase_ternaria_202109
0,39895473,202010,CONTINUA
1,39897650,202010,CONTINUA
2,39898154,202010,CONTINUA
3,39899418,202010,BAJA+1
4,39899785,202010,CONTINUA
...,...,...,...
2425602,80867796,202106,CONTINUA
2425603,80868026,202106,CONTINUA
2425604,80868240,202106,CONTINUA
2425605,80869734,202106,CONTINUA


In [10]:
ground_truth["ganancia"] = ground_truth["clase_ternaria_202109"].map({"BAJA+2":270000, "BAJA+1":-7000, "CONTINUA":-7000})
ground_truth.head()

Unnamed: 0,numero_de_cliente,foto_mes,clase_ternaria_202109,ganancia
0,39895473,202010,CONTINUA,-7000
1,39897650,202010,CONTINUA,-7000
2,39898154,202010,CONTINUA,-7000
3,39899418,202010,BAJA+1,-7000
4,39899785,202010,CONTINUA,-7000


In [18]:
test_months = []
training_months = sorted(MONTHS_BASELINE)
training_months = training_months[0:-1]

In [28]:
for month in MONTHS_INFERENCE:
    print("Last month:", month)
    tags_adv = {
        'stage': 'adversarial',
        'last_month': month,
    }
    test_months.append(month)
    
    df_test = df[df["foto_mes"].isin(test_months)].copy()
    df_train = df[df["foto_mes"].isin(training_months)].copy()

    print("Training months:", df_train["foto_mes"].unique())
    print("Testing months:", df_test["foto_mes"].unique())
    
    df_test = df_test.reset_index(drop=True)
    df_train = df_train.reset_index(drop=True)
    
    X_train = df_train.drop(columns=drop_cols, axis=1).copy()
    X_test = df_test.drop(columns=drop_cols, axis=1).copy()

    model = train_adversarial(X_train, X_test, "adversarial", tags_adv)
    

Last month: 202104
Training months: [202010 202012 202101 202011 202008 202009 202102 202007]
Testing months: [202108 202105 202104 202106 202107 202109]




Last month: 202105
Training months: [202010 202012 202101 202011 202008 202009 202102 202007]
Testing months: [202108 202105 202104 202106 202107 202109]




Last month: 202106
Training months: [202010 202012 202101 202011 202008 202009 202102 202007]
Testing months: [202108 202105 202104 202106 202107 202109]




Last month: 202107
Training months: [202010 202012 202101 202011 202008 202009 202102 202007]
Testing months: [202108 202105 202104 202106 202107 202109]




Last month: 202108
Training months: [202010 202012 202101 202011 202008 202009 202102 202007]
Testing months: [202108 202105 202104 202106 202107 202109]




Last month: 202109
Training months: [202010 202012 202101 202011 202008 202009 202102 202007]
Testing months: [202108 202105 202104 202106 202107 202109]


