# Check Google Colab

In [1]:
try:
  import google.colab
  from google.colab import drive
  drive.mount('/content/drive')
  IN_COLAB = True
except:
  IN_COLAB = False
print(f"am I in Colab? {IN_COLAB}")

am I in Colab? False


# Import

In [2]:
import os
import sys
import re
import random
from datetime import datetime
from tqdm.auto import tqdm 
import pandas as pd
from dotenv import load_dotenv

import uuid

import pandas as pd

# Setting

In [3]:
if IN_COLAB:
  # Colab
  BASE_DIR = "."
  BACKUPS_DATA_DIR   = "/content/drive/MyDrive/Colab Notebooks/Dataclub/llm/data"
else:
  # Local
  BASE_DIR = ".."
  BACKUPS_DATA_DIR   = "../backups"

# Raw directory
RAW_DATA_DIR               = f"{BASE_DIR}/data/raw"
RAW_DOCS_DATA_DIR          = f"{BASE_DIR}/data/raw/documents"
RAW_INFO_DATA_DIR          = f"{BASE_DIR}/data/raw/info"
# Preprocessing
PROCESSED_DATA_DIR         = f"{BASE_DIR}/data/processed"
PROCESSED_DOCS_DATA_DIR    = f"{BASE_DIR}/data/processed/documents"
# Indexing
INDEXING_DATA_DIR          = f"{BASE_DIR}/data/indexing"
INDEXING_DOCS_DATA_DIR     = f"{BASE_DIR}/data/indexing/documents"
# Test directory
TEST_DATA_DIR               = f"{BASE_DIR}/data/test"
GROUND_TRUTH_DATA_DIR       = f"{BASE_DIR}/data/test/ground_truth"
GROUND_TRUTH_DOCS_DATA_DIR  = f"{BASE_DIR}/data/test/ground_truth/documents"
GROUND_TRUTH_GEN_DATA_DIR   = f"{BASE_DIR}/data/test/ground_truth/generated"
# Test directory
EVAL_DATA_DIR            = f"{BASE_DIR}/data/evaluation"
EVAL_RETRIEVER_DATA_DIR  = f"{BASE_DIR}/data/evaluation/retriever"
EVAL_RAG_DATA_DIR        = f"{BASE_DIR}/data/evaluation/rag"
# Config Prompts Dir
PROMPTS_CONFIG_DIR = f"{BASE_DIR}/cooking_recipe_assistant/config/prompts"

# Raw Info
PLAYLIST_INFO_PATH = f"{RAW_INFO_DATA_DIR}/playlist_info.pkl"
VIDEO_PLAYLIST_MAP_PATH = f"{RAW_INFO_DATA_DIR}/video_playlist_map.pkl"

# Ground-truth
GROUND_TRUTH_PATH = f"{GROUND_TRUTH_DATA_DIR}/ground-truth-retrieval.csv"

# Optimization
REST_OPT_ES_BM25_PATH       = f"{EVAL_RETRIEVER_DATA_DIR}/res-opt-es-bm25.json"
REST_OPT_ES_HYBRID_PATH     = f"{EVAL_RETRIEVER_DATA_DIR}/res-opt-es-hybrid.json"
REST_OPT_ES_HYBRID_RRF_PATH = f"{EVAL_RETRIEVER_DATA_DIR}/res-opt-es-hybrid-rrf.json"

# Make dirs if not exists
if not os.path.exists(RAW_DATA_DIR):
  print("Not exists dir: ", RAW_DATA_DIR)
os.makedirs(RAW_DOCS_DATA_DIR, exist_ok=True)
os.makedirs(RAW_INFO_DATA_DIR, exist_ok=True)
os.makedirs(PROCESSED_DOCS_DATA_DIR, exist_ok=True)
os.makedirs(TEST_DATA_DIR, exist_ok=True)
os.makedirs(EVAL_DATA_DIR, exist_ok=True)
os.makedirs(BACKUPS_DATA_DIR, exist_ok=True)

In [4]:
%load_ext autoreload
%autoreload 2
import os
import sys

# Agregar solo si no está ya en sys.path
if BASE_DIR not in sys.path:
    sys.path.append(BASE_DIR)

# Ahora puedes importar tu módulo
import cooking_recipe_assistant
from cooking_recipe_assistant.commons.utils import (
    read_pickle, 
    read_document, 
    read_text,
    save_pickle, 
    save_document,
    save_text
)

from cooking_recipe_assistant.evaluation.retrievers import evaluate
from cooking_recipe_assistant.evaluation.optimization import run_hyperopt
from cooking_recipe_assistant.rags.retrievers.es_bm25 import es_bm25_query
from cooking_recipe_assistant.rags.retrievers.es_hybrid import es_hybrid_query
from cooking_recipe_assistant.rags.retrievers.es_hybrid_rrf import es_hybrid_rrf_query



In [5]:
# Listar todas las variables de entorno
#for key, value in os.environ.items():
#    print(f"{key}: {value}")

In [6]:
def add_grafana_data(df):
    options = ["+1 (Positive)", "-1 (Negative)", "Pass (Skip feedback)"]
    random.seed(42)
    for index, answer_data in df.iterrows():
        conversation_id = str(uuid.uuid4())
        question = answer_data['question']
        answer_data.pop('question')
        #print(row)
        db.save_conversation(
            conversation_id=conversation_id,
            question=question,
            answer_data=answer_data,
        )
        feedback = random.choice(options)
        #print(feedback) 
        if feedback != "Pass (Skip feedback)":
            feedback_value = 1 if feedback == "+1 (Positive)" else -1
            #print(feedback_value) 
            db.save_feedback(
                conversation_id=conversation_id,
                feedback=feedback_value
            )

# Load data

In [7]:
os.environ["POSTGRES_HOST"] = "localhost"

In [8]:
load_dotenv()

True

In [9]:
from cooking_recipe_assistant.database import db

Database timezone: Etc/UTC
Database current time (UTC): 2024-10-28 21:10:16.344565+00:00
Database current time (Europe/Berlin): 2024-10-28 22:10:16.344565+01:00
Python current time: 2024-10-28 22:10:16.345986+01:00
Inserted time (UTC): 2024-10-28 21:10:16.345986+00:00
Inserted time (Europe/Berlin): 2024-10-28 22:10:16.345986+01:00
Selected time (UTC): 2024-10-28 21:10:16.345986+00:00
Selected time (Europe/Berlin): 2024-10-28 22:10:16.345986+01:00


## gpt-4o-mini

In [10]:
GPT_4O_MINI_MODEL_NAME = 'gpt-4o-mini'
GPT_4O_MINI_MODEL_NAME

'gpt-4o-mini'

In [11]:
EVAL_RAG_DATA_DIR

'../data/evaluation/rag'

In [12]:
EVAL_RAG_GPT_4O_MINI_HYBRID_DATA_DIR = f"{EVAL_RAG_DATA_DIR}/{GPT_4O_MINI_MODEL_NAME}_hybrid"

In [13]:
df_eval_rag_hybrid_gpt_4o_mini_stats = pd.read_csv(
    f"{EVAL_RAG_GPT_4O_MINI_HYBRID_DATA_DIR}/rag-evaluation-stats.csv"
)

In [14]:
df_eval_rag_hybrid_gpt_4o_mini_stats.head(5)

Unnamed: 0,question,answer,model_used,response_time,relevance,relevance_explanation,prompt_tokens,completion_tokens,total_tokens,eval_prompt_tokens,eval_completion_tokens,eval_total_tokens,openai_cost
0,What is the recommended temperature and time t...,The recommended temperature to bake the rolled...,gpt-4o-mini,1.349952,RELEVANT,The generated answer provides the exact temper...,2254,43,2297,258,47,305,0.000431
1,What's the best way to achieve a slightly liqu...,To achieve a slightly liquid consistency for t...,gpt-4o-mini,1.939975,RELEVANT,The generated answer directly addresses the qu...,2267,78,2345,288,58,346,0.000465
2,Can I adjust the cooking time of the millefeui...,"The cooking time for the Eggplant, Tomato, and...",gpt-4o-mini,2.05544,RELEVANT,The generated answer directly addresses the qu...,2178,79,2257,294,49,343,0.000448
3,How do I grease the mold for the Creamy Fried ...,To grease the mold for the Creamy Fried Milk d...,gpt-4o-mini,1.479998,RELEVANT,The generated answer directly addresses the qu...,1972,59,2031,270,52,322,0.000403
4,What is the ideal temperature and timing to ch...,The filled chicken in the Chicken Villaray rec...,gpt-4o-mini,1.629993,RELEVANT,The generated answer provides both the timing ...,1971,56,2027,262,60,322,0.000405


In [15]:
add_grafana_data(df_eval_rag_hybrid_gpt_4o_mini_stats)

## GPT-40

In [16]:
GPT_4O_MODEL_NAME = 'gpt-4o'
GPT_4O_MODEL_NAME

'gpt-4o'

In [17]:
EVAL_RAG_GPT_4O_HYBRID_DATA_DIR = f"{EVAL_RAG_DATA_DIR}/{GPT_4O_MODEL_NAME}_hybrid"
EVAL_RAG_GPT_4O_HYBRID_DATA_DIR

'../data/evaluation/rag/gpt-4o_hybrid'

In [18]:
df_eval_rag_hybrid_gpt_4o_stats = pd.read_csv(
    f"{EVAL_RAG_GPT_4O_HYBRID_DATA_DIR}/rag-evaluation-stats.csv"
)

In [19]:
df_eval_rag_hybrid_gpt_4o_stats

Unnamed: 0,question,answer,model_used,response_time,relevance,relevance_explanation,prompt_tokens,completion_tokens,total_tokens,eval_prompt_tokens,eval_completion_tokens,eval_total_tokens,openai_cost
0,Is the Shrimp Cream Soup a hot dish or can it ...,The Shrimp Cream Soup is a hot dish. It takes ...,gpt-4o,3.536518,RELEVANT,The generated answer directly addresses both a...,1453,21,1474,230,64,294,0.005058
1,Is it necessary to add the nutmeg mentioned in...,The nutmeg is mentioned as part of the instruc...,gpt-4o,6.142909,PARTLY_RELEVANT,The generated answer addresses the question ab...,2253,85,2338,294,67,361,0.007888
2,How do I prevent the shortcrust pastry from pu...,To prevent the shortcrust pastry from puffing ...,gpt-4o,3.306008,RELEVANT,The generated answer directly addresses the qu...,2145,43,2188,248,68,316,0.007092
3,"In the Octopus Galician Style recipe, what's t...","In the Octopus Galician Style recipe, the best...",gpt-4o,6.080312,RELEVANT,The generated answer directly addresses both p...,2131,63,2194,272,63,335,0.007267
4,What are some tips for serving the Chickpeas w...,"To serve the Chickpeas with Prawns dish, you c...",gpt-4o,5.282053,RELEVANT,The generated answer provides specific tips fo...,2044,143,2187,347,58,405,0.007987
...,...,...,...,...,...,...,...,...,...,...,...,...,...
695,How can I prevent the filling from oozing out ...,To prevent the filling from oozing out while f...,gpt-4o,4.468939,RELEVANT,The generated answer directly addresses the qu...,2110,76,2186,289,63,352,0.007387
696,What is the recommended cooking time for the R...,The recommended cooking time for the Roasted P...,gpt-4o,4.964379,NON_RELEVANT,The generated answer suggests a cooking time o...,1773,29,1802,235,81,316,0.006120
697,Should I add the choricero pepper paste before...,"For the ""Mixed Paella,"" you should add the cho...",gpt-4o,5.200654,RELEVANT,The generated answer directly addresses the qu...,2276,54,2330,259,67,326,0.007547
698,Can I serve the Hake in Green Sauce dish cold ...,The Hake in Green Sauce dish is designed to be...,gpt-4o,4.375126,RELEVANT,The generated answer directly addresses the qu...,1791,33,1824,236,50,286,0.005897


In [20]:
add_grafana_data(df_eval_rag_hybrid_gpt_4o_stats)