In [None]:
import os
import re
import mlflow
import duckdb
import pandas as pd
from src.model.inference import predictions_per_seed
from src.preprocess.etl import extract, get_dataframe
from src.constants import (
    PATH_CLASE_BINARIA,
    QUERY_DF_TEST,
    QUERY_DF_TRAIN,
)
%load_ext autoreload
%autoreload 2

In [None]:
%reload_ext autoreload

In [None]:
os.environ['MLFLOW_ARTIFACT_ROOT']='gs://mlflow-artifacts-uribe/mlruns'
os.environ['MLFLOW_TRACKING_URI']='sqlite:///database/mlruns.db'

In [None]:
run_name = 'thoughtful-robin-862'
logged_model = 'runs:/f9805d45b33e493aae16051fb33ad24f/model'
loaded_model = mlflow.lightgbm.load_model(logged_model)

In [None]:
con = duckdb.connect(database=":memory:", read_only=False)

In [None]:
extract(con, PATH_CLASE_BINARIA)

In [None]:
df_full = get_dataframe(con, QUERY_DF_TRAIN)
df_test = get_dataframe(con, QUERY_DF_TEST)

In [None]:
con.close()

In [None]:
for i in df_full.columns:
    print(i, ":", re.sub("[^A-Za-z0-9_]+", "", i))

In [None]:
df_full = df_full.rename(columns=lambda x: re.sub("[^A-Za-z0-9_]+", "", x))
df_test = df_test.rename(columns=lambda x: re.sub("[^A-Za-z0-9_]+", "", x))

In [None]:
from src.constants import RANDOM_STATE
from sklearn.model_selection import train_test_split

df_full["stratify"] = df_full["clase_ternaria"].astype(str) + df_full["foto_mes"].astype(str)
df_train, df_valid = train_test_split(
    df_full, test_size=0.05, random_state=RANDOM_STATE, stratify=df_full["stratify"]
)

In [None]:
df_train = df_train.drop(columns=["stratify"], axis=1)
df_valid = df_valid.drop(columns=["stratify"], axis=1)

In [None]:
loaded_model.get_params()

In [None]:
loaded_model.n_jobs = -1

In [None]:
loaded_model.get_params()

In [None]:
predictions_per_seed(df_train, df_valid, df_test, loaded_model, run_name)

In [None]:
df_preds = pd.read_csv(f"../buckets/b1/datasets/processed/predictions/{run_name}/predictions.csv")

In [None]:
df_preds.head()

In [None]:
df_preds["Predicted"].value_counts()

In [None]:
df_preds["Predicted"].value_counts(normalize=True)