In [1]:
# ===============================
# 1️⃣ Librerías
# ===============================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ks_2samp
import requests
from io import StringIO

sns.set(style="whitegrid")


In [2]:
# ===============================
# 2️⃣ Carga de datos
# ===============================
file_path = "../tests/data_tests/prueba_mlops.csv"
data = pd.read_csv(file_path, header=None)

# Asignar nombres genéricos si no existen
data.columns = [f"col_{i}" for i in range(data.shape[1])]

# Dividir en referencia y "current" (simulación drift)
X_reference = data.sample(frac=0.7, random_state=42)
X_current = data.drop(X_reference.index)

# Simular drift: agregar ruido a algunas columnas
X_current_drifted = X_current.copy()
for col in X_current_drifted.columns[:5]:
    X_current_drifted[col] += np.random.normal(0, 1.5, size=len(X_current_drifted))


In [None]:
# ===============================
# 3️⃣ Función para consultar la API
# ===============================
predict_url = "http://localhost:8080/api/caravan-prediction/predict"

def get_predictions_from_api(df_input):
    try:
        csv_buffer = df_input.to_csv(index=False)
        files = {"file": ("input.csv", csv_buffer, "text/csv")}
        response = requests.post(predict_url, files=files)
        
        if response.status_code == 200:
            csv_text = response.text
            df_result = pd.read_csv(StringIO(csv_text))
            print ("✅ Predicciones obtenidas con éxito desde la API.")
            print("Respuesta de la API:\n", df_result.head())
            return df_result
        else:
            print("Error en el endpoint:", response.text)
            return None
    except Exception as e:
        print("❌ Error:", str(e))
        return None


In [None]:
# ===============================
# 4️⃣ Obtener predicciones de referencia y drifted
# ===============================
# Convertir todas las columnas a float
X_reference_str = X_reference.astype(str)
X_current_drifted_str = X_current_drifted.astype(str)

df_pred_reference = get_predictions_from_api(X_reference_str)
df_pred_current = get_predictions_from_api(X_current_drifted_str)

print("Predicciones referencia:")
print(df_pred_reference.head())

print("\nPredicciones drifted:")
print(df_pred_current.head())


Error en el endpoint: {"detail":"You are trying to merge on object and int64 columns. If you wish to proceed you should use pd.concat"}
Error en el endpoint: {"detail":"You are trying to merge on object and int64 columns. If you wish to proceed you should use pd.concat"}

Predicciones drifted:


AttributeError: 'NoneType' object has no attribute 'head'

In [None]:
# ===============================
# 5️⃣ Función de detección de drift
# ===============================
def detect_drift(train_df, test_df, alpha=0.05):
    drift_scores = {}
    drifted_cols = []
    for col in train_df.columns:
        stat, p_value = ks_2samp(train_df[col], test_df[col])
        drift_scores[col] = p_value
        if p_value < alpha:
            drifted_cols.append(col)
    return drift_scores, drifted_cols

drift_scores, drifted_cols = detect_drift(X_reference, X_current_drifted)
print("Columnas con drift significativo:", drifted_cols)


In [None]:
# ===============================
# 6️⃣ Dashboard visual de drift
# ===============================
def plot_drift(train_df, test_df, drifted_cols, max_cols=10):
    cols_to_plot = drifted_cols[:max_cols] if drifted_cols else train_df.columns[:max_cols]
    for col in cols_to_plot:
        plt.figure(figsize=(8,4))
        sns.kdeplot(train_df[col], label='Referencia', fill=True)
        sns.kdeplot(test_df[col], label='Actual Drifted', fill=True)
        plt.title(f'Distribution Drift: {col}')
        plt.legend()
        plt.show()

plot_drift(X_reference, X_current_drifted, drifted_cols)


In [None]:
# ===============================
# 7️⃣ Comparación de predicciones del modelo
# ===============================
if df_pred_reference is not None and df_pred_current is not None:
    pred_change_rate = (df_pred_reference.values != df_pred_current.values).mean()
    print(f"Proporción de predicciones que cambiaron debido al drift: {pred_change_rate:.2%}")
