In [None]:
import mlflow, os, duckdb
from src.model.inference import predictions_per_seed
from src.model.training import training_loop
from src.preprocess.etl import extract, get_dataframe, transform
import pandas as pd
from src.constants import (
    PARAMS_LGB,
    PATH_CLASE_BINARIA,
    PATH_CRUDO,
    QUERY_DF_TEST,
    QUERY_DF_TRAIN,
    QUERY_DF_VALID,
    RUN_ETL,
)
%load_ext autoreload
%autoreload 2

In [None]:
%reload_ext autoreload

In [None]:
os.environ['MLFLOW_ARTIFACT_ROOT']='gs://mlflow-artifacts-uribe/mlruns'
os.environ['MLFLOW_TRACKING_URI']='sqlite:///database/mlruns.db'

In [None]:
run_name = 'thoughtful-robin-862'
logged_model = 'runs:/f9805d45b33e493aae16051fb33ad24f/model'
loaded_model = mlflow.lightgbm.load_model(logged_model)

In [None]:
con = duckdb.connect(database=":memory:", read_only=False)

In [None]:
extract(con, PATH_CLASE_BINARIA)

In [None]:
df_train = get_dataframe(con, QUERY_DF_TRAIN)
df_valid = get_dataframe(con, QUERY_DF_VALID)
df_test = get_dataframe(con, QUERY_DF_TEST)

In [None]:
con.close()

In [None]:
loaded_model.get_params()

In [None]:
predictions_per_seed(df_train, df_valid, df_test, loaded_model, run_name)

In [None]:
df_preds = pd.read_csv(f"../buckets/b1/datasets/processed/predictions/{run_name}/predictions.csv")

In [None]:
preds = df_test["numero_de_cliente"].to_frame()
preds["Predicted"] = loaded_model.predict_proba(df_test.drop(["clase_binaria"], axis=1))[:,1]

In [None]:
cut = 10000
final_preds_cut = preds.copy()
final_preds_cut = final_preds_cut.sort_values(by="Predicted", ascending=False)
final_preds_cut = final_preds_cut.reset_index(drop=True)
final_preds_cut.loc[0:cut, "Predicted"] = True
final_preds_cut.loc[cut:, "Predicted"] = False

In [None]:
final_preds_cut.to_csv(f"{cut}.csv", index=False)

In [None]:
final_preds_cut