In [0]:
%pip install xgboost pandas mlflow

In [0]:
import mlflow.xgboost
import pandas as pd
import numpy as np
from pyspark.sql.functions import col
import json

In [0]:

model_name = "workspace.churn_zero.churn_risk_model_xgboost" 
loaded_model = mlflow.xgboost.load_model(f"models:/{model_name}@n3") 

print("Modelo carregado do Model Registry/Unity Catalog.")

In [0]:
with open("/Workspace/Users/joaovab@al.insper.edu.br/Hackathon_Terra_Signal_grupo_1/model/feature_names.json", "r") as f:
    feature_names = json.load(f)

print("Features carregadas com sucesso!")
print(feature_names)

In [0]:
df_inferencia_spark = spark.table("workspace.churn_zero.inference_gold")
df_inferencia_pandas = df_inferencia_spark.toPandas()

customer_ids = df_inferencia_pandas['customerID']

for c in df_inferencia_pandas.columns:
  if c not in feature_names:
    df_inferencia_pandas = df_inferencia_pandas.drop(columns=[c])

for c in feature_names:
  if c not in df_inferencia_pandas.columns:
    df_inferencia_pandas[c] = 0



selected_features = [
    col for col in feature_names
    if col not in ['customerID', 'MonthlyCharges']
]
X_inferencia = df_inferencia_pandas[selected_features]

In [0]:
churn_probabilities = loaded_model.predict_proba(X_inferencia)[:, 1]

results_df_pandas = pd.DataFrame({
    'customerID': customer_ids, 
    'churn_probability': churn_probabilities,
    'churn': np.round(churn_probabilities, 0),
})

results_df_pandas['prediction'] = results_df_pandas['churn'].apply(lambda x: 'Yes' if x == 1 else 'No')

results_df_spark = spark.createDataFrame(results_df_pandas)

In [0]:

tabela_original = spark.table("workspace.churn_zero.inference_gold") 

df_com_probabilidade = tabela_original.join(
    results_df_spark,
    on="customerID",
    how="inner" 
)

print("\nPr√©via da Tabela Original com a Coluna de Probabilidade Adicionada:")
df_com_probabilidade.select("customerID", "Partner", "Churn_Probability").show(5)


CATALOGO_FINAL = "workspace"
SCHEMA_FINAL = "churn_zero"
TABELA_INFERENCIA_FINAL = "inference_churn"

df_com_probabilidade.write.mode("overwrite").option("mergeSchema", "true").saveAsTable(f"{CATALOGO_FINAL}.{SCHEMA_FINAL}.{TABELA_INFERENCIA_FINAL}")
print(f"\nTabela final salva em: {TABELA_INFERENCIA_FINAL}")

In [0]:
df_inference_churn = spark.table("workspace.churn_zero.inference_churn")
df_inference_churn.toPandas().to_csv("../prediction.csv", index=False)