In [56]:
from google.cloud import bigquery
import pandas as pd

In [None]:

time = '2025-06-24'
SQL_QUERY =  f"""
SELECT
  sso_id,additional_info,campaign_details,campaign_name,platform,event_time
FROM
  `htz-common.raw_data.requests` 
WHERE
  TIMESTAMP_TRUNC(event_time, DAY) >= TIMESTAMP('{time}')
  AND page_type = 'Chatbot'
  AND campaign_name in ('response answer','open question')
  -- AND sso_id IS NOT NULL
ORDER BY
  event_time DESC;
"""
client = bigquery.Client()
query_job = client.query(SQL_QUERY)

results = query_job.result() 
df = results.to_dataframe()
df['event_time'] = df['event_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
df = df.drop_duplicates(subset=['sso_id', 'campaign_name'], keep='last')
print(df.shape[0])
df.head(2)



2


Unnamed: 0,sso_id,additional_info,campaign_details,campaign_name,platform,event_time
34,8010962861,"[{'key': 'total_time', 'value': '4.02018070220...",<strong>אורון שמיר</strong> ממליץ על הסרט <str...,response answer,Desktop,2025-06-25 04:48:21
36,8010962861,"[{'key': 'session_id', 'value': '117'}]",בוקר טוב,open question,Desktop,2025-06-25 04:47:50


In [58]:
# preprocess the data

df['parsed_data'] = df['additional_info'].apply(lambda x: {item['key']: item['value'] for item in x})
expanded_df = pd.json_normalize(df['parsed_data'].tolist())
expanded_df = expanded_df.drop(columns=['sso_id'])
df_cleaned = df.drop(columns=['additional_info', 'parsed_data'])
df = pd.concat([df_cleaned, expanded_df], axis=1)
df['id'] = df['sso_id'].astype(str) + '_' + df['session_id'].astype(str)

df.head(2)

Unnamed: 0,sso_id,campaign_details,campaign_name,platform,event_time,total_time,rag_speed,session_id,timestamp,input_tokens,...,article_ids_1,error,remaining_user_messages,troll_triggered,conversation_key,genres,regenerate,media_type,streaming_platforms,id
34,8010962861,<strong>אורון שמיר</strong> ממליץ על הסרט <str...,response answer,Desktop,2025-06-25 04:48:21,,,,,,...,,,,,,,,,,8010962861_nan
36,8010962861,בוקר טוב,open question,Desktop,2025-06-25 04:47:50,,,,,,...,,,,,,,,,,8010962861_nan


In [59]:
df[df['campaign_details'].fillna('').str.contains('closing_question')]

Unnamed: 0,sso_id,campaign_details,campaign_name,platform,event_time,total_time,rag_speed,session_id,timestamp,input_tokens,...,article_ids_1,error,remaining_user_messages,troll_triggered,conversation_key,genres,regenerate,media_type,streaming_platforms,id


In [60]:
df['id'].value_counts().sort_values(ascending=False).head(10)

id
8010962861_nan    2
<NA>_117          2
Name: count, dtype: int64

In [61]:
def parse_conversation(ids:str) -> str:
    """
    Parse the conversation from the DataFrame based on the provided id.
    """
    conversation = df[df['id'] == ids]
    conversation = conversation.sort_values(by='event_time', ascending=True)
    if conversation.empty:
        return "No conversation found for this ID."
    
    conversation_text = []
    for _, row in conversation.iterrows():
        if row['campaign_name'] == 'open question':
            text = f"{row['event_time']} - USER: {row['campaign_details']}"
        elif row['campaign_name'] == 'response answer':
            text = f"{row['event_time']} - LLM: {row['campaign_details']}"
        conversation_text.append(text)
    
    return "\n".join(conversation_text)

txt = parse_conversation('8010962861_117')
print(txt)

No conversation found for this ID.


In [62]:
df[df['id'] == '8010962861_117']

Unnamed: 0,sso_id,campaign_details,campaign_name,platform,event_time,total_time,rag_speed,session_id,timestamp,input_tokens,...,article_ids_1,error,remaining_user_messages,troll_triggered,conversation_key,genres,regenerate,media_type,streaming_platforms,id


In [63]:
from pydantic import BaseModel, Field

class ReviewChat(BaseModel):
    quality: int = Field(..., description="Quality rating of the chat conversation, from 1 to 5, where 1 is poor and 5 is excellent.")
    feedback: str = Field(..., description="Feedback on the chat conversation.")
    


In [64]:
from google import genai
import os

model_name = "gemini-2.5-pro"
system_instruction = """
You are an LLM as a judge designed to test the quality of the output of a RAG-based chatbot for movie and series recommendations. 
You will receive parameters for examining the conversation between the user and the language model, 
and you must return an answer as to whether the correspondence meets the criteria you received.
"""

client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY"))
temp = client.models.generate_content(
    model=model_name,
    contents=txt,
                config={
            'response_mime_type': 'application/json',
            'response_schema': ReviewChat,
            'system_instruction': system_instruction,
        },
)



In [65]:
temp.parsed

ReviewChat(quality=1, feedback='No conversation was provided to evaluate.')

In [66]:
txt

'No conversation found for this ID.'