# LLMs - LangGraph MLFlow Validation


In [1]:
import os

# Print the current working directory
print("Current Directory:", os.getcwd())

# Move one directory back
os.chdir('..')

# Print the new current working directory
print("Moved Back to:", os.getcwd())

Current Directory: c:\Users\0020441\Desktop\PROYECTOS_GIT\langchain\poc_ayto_madrid_zonas_verdes\research
Moved Back to: c:\Users\0020441\Desktop\PROYECTOS_GIT\langchain\poc_ayto_madrid_zonas_verdes


In [2]:
import mlflow
import time
import pandas as pd
import numpy as np
import uuid
from langserve import RemoteRunnable
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from src.greenzones.config.config_prompt import system_template_dict
from src.greenzones.config.config_fewshot import fewshot_dict
from src.greenzones.utils.format_img import user_local_img_input, obtain_img_path, parse_ai_message_output

session_id = str(uuid.uuid4())
config = None # No thread_id for memory
greenzones = RemoteRunnable("http://localhost:8000/greenzones")

In [None]:
# !mlflow server --host 127.0.0.1 --port 5000

In [3]:
# Maping information of the langserve deployed
# NOTE: This is just a map to evaluate challengers, this is not conected with the app
azure_deployment = "gpt-4o-mini"    
openai_api_version = "2024-02-01",
prompt_version = 'v02'
fewshot_version = 'v02'
system_prompt = system_template_dict[prompt_version]
fewshot = fewshot_dict[fewshot_version]

In [4]:
# Defining the params of the run
params = {
    'azure_deployment': azure_deployment,
    'openai_api_version': openai_api_version,
    'prompt_version': prompt_version,
    'fewshot_version': fewshot_version,
    'fewshot': fewshot,
    'system_prompt': system_prompt,
}

In [5]:
# Dictionary to keep the data
data_dict = {
    'img_list' : [],
    'label_pred' : [],
    'confidence_list' : [],
    'response_t_list' : [],
    'input_tokens_list' : [],
    'output_tokens_list' : [],
    'total_tokens_list' : [],
}

In [14]:
### RECURRENT CALL
dir = 'artifacts/evaluations/pilotos/6_25'
img_path_list = obtain_img_path(dir)
print(f'Analyzing {len(img_path_list)} img')
for (img, path) in img_path_list:

    try:
        print(img)
        user_input = user_local_img_input(path)
        
        start_time = time.time()
        
        response = greenzones.invoke({"messages": [("human", user_input)]}, config)
        
        end_time = time.time()
        response_t = round(end_time - start_time, 2)
        
        ai_message = response["messages"][-1]

        label, confidence, input_tokens, output_tokens, total_tokens = parse_ai_message_output(ai_message)

        if label in (0, 1,"0","1"):
            print(f'OK: {ai_message.content}')
            data_dict['img_list'].append(img)
            data_dict['label_pred'].append(label)
            data_dict['confidence_list'].append(confidence)
            data_dict['response_t_list'].append(response_t)
            data_dict['input_tokens_list'].append(input_tokens)
            data_dict['output_tokens_list'].append(output_tokens)
            data_dict['total_tokens_list'].append(total_tokens)
        else:
            print(f'KO: label <{label}>, response <{ai_message.content}>')
            continue
    except Exception as e:
        print(e)
        continue


Analyzing 25 img
1689242233469_fffffffffaf0e60c0000000075b319f8.jpg
OK: [("etiquetado", 0), ("confianza", 0.85), ("analisis": "El escardado no es adecuado debido a la presencia abundante de hierbas en el alcorque, lo que indica falta de mantenimiento. Aunque el suelo parece aireado, la compactación y la presencia de malas hierbas hacen que el entrecavado no sea aceptable.")]
     
1689325246275_0000000062992ad00000000075b319f8.jpg
OK: [("etiquetado", 0), ("confianza", 0.95), ("analisis": "El escardado es inadecuado debido a la abundante vegetación no deseada en el alcorque. Además, el entrecavado no es suficiente, ya que el suelo parece compacto y no se observa movimiento de tierras ni terrones de tierra en el alcorque.")]
     
1689325516977_0000000062992ad00000000075b319f8.jpg
OK: [("etiquetado", 0), ("confianza", 1.00), ("analisis": "El escardado es inadecuado debido a la abundante vegetación no deseada que rodea el tronco del árbol. Además, no se puede evaluar el entrecavado por la

In [None]:
## Calculate the metrics

label_pred_df = pd.DataFrame(data_dict)
label_pred_df.rename(columns={'img_list': 'IMAGE'}, inplace=True)

val_eval_df = pd.read_csv('artifacts/evaluations/val_eval_data_cleaned.csv')
df = pd.merge(val_eval_df, label_pred_df, on='IMAGE')

# Keep the data df to late analysis
df.to_csv(f'auditoria_test3_{prompt_version}_{fewshot_version}.csv', index=False)

# Cleans nan for val
df.dropna(subset=['label_pred'], inplace=True)

# Calculate the accuracy
accuracy = accuracy_score(df['label'], df['label_pred'])
# Calculate other metrics
precision = precision_score(df['label'], df['label_pred'])
recall = recall_score(df['label'], df['label_pred'])
f1 = f1_score(df['label'], df['label_pred'])

In [16]:
metrics = {
    'accuracy': accuracy,
    'precision': precision,
    'recall': recall,
    'f1_score': f1,
    'eval_img': len(label_pred_df),
    'avg_confidence': np.mean(data_dict['confidence_list']),
    'std_confidence': np.std(data_dict['confidence_list']),
    'avg_response_t': np.mean(data_dict['response_t_list']),
    'sdt_response_t': np.std(data_dict['response_t_list']),
    'avg_input_tokens': np.mean(data_dict['input_tokens_list']),
    'std_input_tokens': np.std(data_dict['input_tokens_list']),
    'avg_output_tokens': np.mean(data_dict['output_tokens_list']),
    'std_output_tokens': np.std(data_dict['output_tokens_list']), 
    'avg_total_tokens': np.mean(data_dict['total_tokens_list']),
    'std_total_tokens': np.std(data_dict['total_tokens_list']),
}

In [17]:
mlflow.set_tracking_uri(uri="http://localhost:5000/")

# Register the evaluation
mlflow.set_experiment("LLMs ZONAS VERDES")

with mlflow.start_run():
    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    mlflow.set_tag("tests info", "piloto auditoria")
    # mlflow.tensorflow.log_model(cnn, model_name)

2024/10/24 13:21:11 INFO mlflow.tracking._tracking_service.client: 🏃 View run unequaled-colt-602 at: http://localhost:5000/#/experiments/390414438658344028/runs/7aa48e087b9c4405a54344904a6a4e7a.
2024/10/24 13:21:11 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/390414438658344028.
