In [1]:
# Training Pipeline
import pickle
from lightgbm import LGBMClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import mlflow
import mlflow.sklearn
import pandas as pd

# 1. Cargar las características y etiquetas del Feature Pipeline
X_path = "C:/Users/megag/Documents/Curso IA/Curso Avanzado/Machine Learning Clasification/X_features.pkl"
y_path = "C:/Users/megag/Documents/Curso IA/Curso Avanzado/Machine Learning Clasification/y_labels.pkl"
vectorizer_path = "C:/Users/megag/Documents/Curso IA/Curso Avanzado/Machine Learning Clasification/tfidf_vectorizer.pkl"

with open(X_path, "rb") as f:
    X = pickle.load(f)

with open(y_path, "rb") as f:
    y = pickle.load(f)

# 2. Dividir los datos en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Definir el modelo LGBMClassifier
model = LGBMClassifier(n_estimators=100, random_state=42)

# 4. Iniciar un experimento en MLflow
mlflow.set_experiment("Sentiment Analysis Training")
with mlflow.start_run():
    # 5. Entrenar el modelo
    model.fit(X_train, y_train)

    # 6. Hacer predicciones en el conjunto de prueba
    y_pred = model.predict(X_test)

    # 7. Calcular métricas
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average="weighted")

    # 8. Registrar las métricas y el modelo en MLflow
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1_score", f1)
    mlflow.sklearn.log_model(model, "sentiment_analysis_model")

    print(f"Accuracy: {accuracy}, F1 Score: {f1}")

# 9. Guardar el modelo entrenado
model_path = "C:/Users/megag/Documents/Curso IA/Curso Avanzado/Machine Learning Clasification/sentiment_analysis_model.pkl"
with open(model_path, "wb") as f:
    pickle.dump(model, f)

print("Modelo entrenado y guardado exitosamente.")


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003016 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11025
[LightGBM] [Info] Number of data points in the train set: 796, number of used features: 397
[LightGBM] [Info] Start training from score -2.654247
[LightGBM] [Info] Start training from score -3.096080
[LightGBM] [Info] Start training from score -1.158138
[LightGBM] [Info] Start training from score -1.198960
[LightGBM] [Info] Start training from score -3.789227
[LightGBM] [Info] Start training from score -3.213863
[LightGBM] [Info] Start training from score -1.579733




Accuracy: 0.6432160804020101, F1 Score: 0.6099948363370737
Modelo entrenado y guardado exitosamente.
