# Práctica Final: Clasificación con Scikit-learn y MLflow

En esta práctica, utilizarás un conjunto de datos de Scikit-learn (podeís usar el mismo que en el notebook de Intro MLFlow) para entrenar un modelo de clasificación.

Pasos a seguir: 

    Exploración de Datos: Analiza el conjunto de datos proporcionado para comprender su estructura y contenido.

    Preprocesamiento de Texto: Realiza tareas de preprocesamiento de texto, como tokenización y vectorización, para preparar los datos para el modelado.

    Entrenamiento del Modelo: Utiliza algoritmos de clasificación de Scikit-learn para entrenar un modelo con los datos preprocesados.

    Evaluación del Modelo: Evalúa el rendimiento del modelo utilizando métricas de evaluación estándar como precisión y recall.

    Registro de Métricas con MLflow: Utiliza MLflow para registrar métricas y hiperparámetros durante el entrenamiento, facilitando la gestión y comparación de experimentos.


Nota: Dado que no voy a poder tener acceso a vuestros logs de MLFlow añadirme las imagenes de la interfaz de MLFlow en el notebook

![](./img/Comparation.PNG)

![](./img/Comparation2.PNG)

![](./img/Metrics.PNG)

## Generar .py de funciones y main con al menos dos argumentos de entrada.

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import mlflow
import argparse
import subprocess
import time
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

## Params
def params():
    parser = argparse.ArgumentParser(description='__main__')
    parser.add_argument('--job_name', type=str, help='Project name')
    parser.add_argument('--n_estimator_list', nargs='+', type=int, help='List of n_estimators')
    parser.add_argument('--model', type=str, help='Model name')
    return parser.parse_args()	

## Data loading
def data_loading():
    data = load_breast_cancer()
    df = df = pd.DataFrame(data['data'], columns=data['feature_names'])
    df['target'] = data['target']
    return df

## Data preprocess
def data_preprocess(df):
    train, test = train_test_split(df, test_size=0.2)
    test_target = test['target']
    test[['target']].to_csv('test-target.csv', index=False)
    del test['target']
    test.to_csv('test.csv', index=False)

    features = [x for x in list(train.columns) if x != 'target']
    x_raw = train[features]
    y_raw = train['target']
    x_train, x_test, y_train, y_test = train_test_split(x_raw, y_raw,
                                                        test_size=.20,
                                                        random_state=123,
                                                        stratify=y_raw)
    return x_train, x_test, y_train, y_test

## Model training
def mlflow_tracking(nombre_job, x_train, x_test, y_train, y_test, modelos): 
    print('Ejecutando mlflow_tracking')
    
    mlflow_ui_process = subprocess.Popen(['mlflow', 'ui', '--port', '5000']) 
    print(mlflow_ui_process)
    time.sleep(5)
    
    mlflow.set_experiment(nombre_job) 
    
    for model_name, model_instance in modelos:
        with mlflow.start_run() as run:
            print(f"Entrenando modelo: {model_name}")
            
            preprocessor = Pipeline(steps=[('scaler', StandardScaler())])
            model = Pipeline(steps=[('preprocessor', preprocessor),
                                     ('classifier', model_instance)])
            
            model.fit(x_train, y_train)
            
            accuracy_train = model.score(x_train, y_train)
            accuracy_test = model.score(x_test, y_test)
            y_pred = model.predict(x_test)
            precision = precision_score(y_test, y_pred)
            recall = recall_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
            roc_auc = roc_auc_score(y_test, model.predict_proba(x_test)[:, 1])

            mlflow.set_tag("mlflow.runName", f"{model_name}-experiment")
            
            mlflow.log_param('modelo', model_name)
            mlflow.log_metric('accuracy_train', accuracy_train)
            mlflow.log_metric('accuracy_test', accuracy_test)
            mlflow.log_metric('precision', precision)
            mlflow.log_metric('recall', recall)
            mlflow.log_metric('f1_score', f1)
            mlflow.log_metric('roc_auc', roc_auc)
            
            mlflow.sklearn.log_model(model, f'{model_name}-model')
            
            print(f"{model_name} successfully registered.")
    
    print("All models were successfully trained.")




In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC


from functions import params, data_loading, data_preprocess, mlflow_tracking

def main():
    print("Ejecutando main")
    args_values = params()
    df = data_loading()
    x_train, x_test, y_train, y_test = data_preprocess(df)
    
    # Lista de modelos para probar
    models = [
        ('RandomForest', RandomForestClassifier(n_estimators=100, random_state=123)),
        ('LogisticRegression', LogisticRegression(max_iter=1000, random_state=123)),
        ('DecisionTree', DecisionTreeClassifier(random_state=123)),
        ('SVC', SVC(probability=True, random_state=123))
    ]
    
    # Llama a la función de tracking con los modelos
    mlflow_tracking(args_values.job_name, x_train, x_test, y_train, y_test, models)

if __name__ == "__main__":
    main()

## Práctica parte FastAPI

### Para esta parte de la práctica teneis que generar un script con al menos 5 modulos app.get y dos de ellos tienen que ser pipelines de HF. 

### Parte de la practica se tendra que entregar en capturas de pantalla. Las capturas de pantalla a adjuntas son las siguientes. 

### 1. Captura de la pantalla docs con al menos 5 modulos. 
### 2. Captura de cada una de los modulos con la respuesta dentro de docs. 
### 3. Captura de cada uno de los modulos en la llamada https.
### 4. Todo el codigo usado durante el proceso. Notebooks y scripts.

### Opcional

### 5. Despliegue del script en GCP Cloud Run

![](./img/1.PNG)

![](./img/2.PNG)

![](./img/3.PNG)

![](./img/4.PNG)

![](./img/5.PNG)

![](./img/5.PNG)

![](./img/6.PNG)

![](./img/7.PNG)

In [None]:
from fastapi import FastAPI
from pydantic import BaseModel
from typing import Optional
import pandas as pd 
import requests
from transformers import pipeline

app = FastAPI()

@app.get("/")
def read_root():
    return {"message": "Running correctly. Welcome."}

@app.get("/sentiment")
def sentiment_classifier(query: str):
    sentiment_pipeline = pipeline("sentiment-analysis")
    result = sentiment_pipeline(query)
    return {"Sentiment": result[0]['label'], "Confidence": result[0]['score']}

@app.get("/summary")
def summarize_text(query: str):
    summarizer = pipeline("summarization")
    summary = summarizer(query)
    return {"Summary": summary[0]['summary_text']}



ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
class TextRequest(BaseModel):
    text: str

def convert_to_serializable(result):
    for entity in result:
        entity['score'] = float(entity['score'])
    return result

@app.post("/ner")
def ner(request: TextRequest):
    """
    Realiza el reconocimiento de entidades nombradas (NER) sobre un texto.
    """
    result = ner_pipeline(request.text)
    result = convert_to_serializable(result) 
    return {"entities": result}


RIOT_API_KEY = "RGAPI-fb264af6-4732-4a13-a547-06a5f2c5ea6e"
RIOT_API_URL = "https://ddragon.leagueoflegends.com/cdn/12.20.1/data/en_US/champion.json"  

@app.get("/LoL/champion/{champion_name}")
def get_champion_habilities(champion_name: str):
    """
    Obtener información sobre un campeón específico, incluidas sus habilidades.
    """
    url = f"https://ddragon.leagueoflegends.com/cdn/12.20.1/data/en_US/champion/{champion_name}.json"

    response = requests.get(url)
    
    if response.status_code == 200:
        champion_data = response.json()
        abilities = champion_data.get("data", {}).get(champion_name, {}).get("spells", [])
        
        skills = []
        for ability in abilities:
            skills.append({
                "name": ability.get("name"),
                "description": ability.get("description")
            })

        return {"champion": champion_name, "skills": skills}
    else:
        return {"error": f"Champion {champion_name} not found"}
    
@app.get("/LoL/champions")
def get_champions():
    """
    Obtener lista de campeones disponibles.
    """
    response = requests.get(RIOT_API_URL)
    
    if response.status_code == 200:
        champions_data = response.json()
        champions = champions_data.get("data", {})
        champion_names = list(champions.keys())
        return {"champions": champion_names}
    else:
        return {"error": "Could not retrieve champion data"}
    


