In [None]:
# ===========================================
# NOTEBOOK 05 – Evaluation & Export
# ===========================================

import pandas as pd
import joblib
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from src.data.preprocess import preprocess_series
from src.features.vectorizer import load_vectorizer

df = pd.read_csv("../data/processed/cleaned_dataset.csv")

X = df["clean_text"]
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

vectorizer = load_vectorizer()
model = joblib.load("../models/best_model.pkl")

X_test_vec = vectorizer.transform(X_test)
preds = model.predict(X_test_vec)


In [None]:
# Métricas finales
print("=== Reporte Final ===")
print(classification_report(y_test, preds))


In [None]:
# Matriz de confusión
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y_test, preds)
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Purples")
plt.title("Matriz de Confusión")
plt.xlabel("Predicción")
plt.ylabel("Real")
plt.show()


In [None]:
# Probar el modelo en ejemplos manuales
examples = [
    "I feel extremely anxious right now",
    "Today I feel relaxed and calm",
    "Everything seems overwhelming lately"
]

for text in examples:
    vec = vectorizer.transform([text])
    pred = model.predict(vec)[0]
    print(f"Texto: {text}\n → Predicción: {pred}\n")
