In [9]:
from google.cloud import bigquery
import pandas as pd

In [18]:

time = '2025-06-29'
SQL_QUERY =  f"""
SELECT
  sso_id,additional_info,campaign_details,campaign_name,platform,event_time,record_type
FROM
  `htz-common.raw_data.requests` 
WHERE
  TIMESTAMP_TRUNC(event_time, DAY) >= TIMESTAMP('{time}')
  AND page_type = 'Chatbot'
  AND campaign_name in ('response answer','open question')
  -- AND sso_id IS NOT NULL
ORDER BY
  event_time DESC;
"""
client = bigquery.Client()
query_job = client.query(SQL_QUERY)

results = query_job.result() 
df = results.to_dataframe()
# add index
df['index'] = df.index
df['event_time_temp'] = df['event_time'].dt.strftime('%Y-%m-%d %H:%M')
df['event_time'] = df['event_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
df = df.drop_duplicates(subset=['sso_id', 'campaign_details','event_time_temp'], keep='last')
response = df[df['campaign_name'] == 'response answer'].drop_duplicates(subset=['sso_id'], keep='last')
df_f = pd.concat([df[df['campaign_name'] == 'open question'], response], ignore_index=True)
df_f = df_f.drop(columns=['event_time_temp'])
print(df_f.shape[0])
df_f = df_f.reset_index(drop=True)
df_f.head(2)



45


Unnamed: 0,sso_id,additional_info,campaign_details,campaign_name,platform,event_time,record_type,index
0,366578086115,"[{'key': 'session_id', 'value': '15'}]",More,open question,Mobile,2025-06-29 10:38:37,action,2
1,9037014398,"[{'key': 'session_id', 'value': '16'}]",אפשר משו בנטפליקס?,open question,Desktop,2025-06-29 10:35:01,action,5


In [22]:
response

Unnamed: 0,sso_id,additional_info,campaign_details,campaign_name,platform,event_time,record_type,index,event_time_temp
29,366578086115,"[{'key': 'total_time', 'value': '6.62964773178...",<strong>אורון שמיר</strong> ממליץ על הסרטים <s...,response answer,Mobile,2025-06-29 10:23:27,impression,29,2025-06-29 10:23
63,50000262618,"[{'key': 'total_time', 'value': '2.01614069938...",,response answer,Desktop,2025-06-29 08:40:02,impression,63,2025-06-29 08:40
96,9037014398,"[{'key': 'total_time', 'value': '4.16943001747...",בהחלט! <strong>חן חדד</strong> ממליצה על <stro...,response answer,Desktop,2025-06-29 07:27:47,impression,96,2025-06-29 07:27
98,8010962861,"[{'key': 'total_time', 'value': '3.51950001716...",<strong>שני ליטמן</strong> ממליצה על הסרט <str...,response answer,Desktop,2025-06-29 07:05:43,impression,98,2025-06-29 07:05
102,9037857899,"[{'key': 'total_time', 'value': '1.92660093307...",,response answer,Desktop,2025-06-29 06:31:49,impression,102,2025-06-29 06:31
106,8883739807,"[{'key': 'total_time', 'value': '4.53711366653...",<strong>חן חדד</strong> ממליצה על <strong>הקלל...,response answer,Desktop,2025-06-29 06:05:21,impression,106,2025-06-29 06:05
112,5092504451,"[{'key': 'total_time', 'value': '4.74243569374...",<strong>גילי איזיקוביץ'</strong> ממליצה על הסד...,response answer,App,2025-06-29 05:38:55,impression,112,2025-06-29 05:38
118,6999232255,"[{'key': 'total_time', 'value': '4.29052591323...",אהלן! <strong>חן חדד</strong> המליץ על שתי דרמ...,response answer,Desktop,2025-06-29 05:20:48,impression,118,2025-06-29 05:20


In [20]:
df_f

Unnamed: 0,sso_id,additional_info,campaign_details,campaign_name,platform,event_time,record_type,index
0,366578086115,"[{'key': 'session_id', 'value': '15'}]",More,open question,Mobile,2025-06-29 10:38:37,action,2
1,9037014398,"[{'key': 'session_id', 'value': '16'}]",אפשר משו בנטפליקס?,open question,Desktop,2025-06-29 10:35:01,action,5
2,9037014398,"[{'key': 'session_id', 'value': '16'}]",אפשר סדרת מתח שוודי?,open question,Desktop,2025-06-29 10:34:42,action,7
3,9037014398,"[{'key': 'session_id', 'value': '16'}]",אפשר דוקו יפני?,open question,Desktop,2025-06-29 10:34:31,action,9
4,366578086115,"[{'key': 'session_id', 'value': '15'}]",More,open question,Mobile,2025-06-29 10:28:05,action,15
5,366578086115,"[{'key': 'session_id', 'value': '15'}]",Two films,open question,Mobile,2025-06-29 10:27:46,action,18
6,366578086115,"[{'key': 'session_id', 'value': '14'}]",More,open question,Mobile,2025-06-29 10:27:27,action,21
7,366578086115,"[{'key': 'session_id', 'value': '14'}]",Two random films,open question,Mobile,2025-06-29 10:26:47,action,24
8,366578086115,"[{'key': 'session_id', 'value': '12'}]",More,open question,Mobile,2025-06-29 10:25:04,action,25
9,366578086115,"[{'key': 'session_id', 'value': '12'}]",Two random films,open question,Mobile,2025-06-29 10:24:48,action,28


In [11]:
# preprocess the data

df['parsed_data'] = df['additional_info'].apply(lambda x: {item['key']: item['value'] for item in x})
# add index to parsed data
expanded_df = pd.json_normalize(df['parsed_data'].tolist())

expanded_df = expanded_df.drop(columns=['sso_id'])
df_cleaned = df.drop(columns=['additional_info', 'parsed_data'])
df = pd.concat([df_cleaned, expanded_df], axis=1)
df['id'] = df['sso_id'].astype(str) + '_' + df['session_id'].astype(str)

df.head(2)

Unnamed: 0,sso_id,campaign_details,campaign_name,platform,event_time,record_type,index,session_id,total_time,rag_speed,...,article_ids_1,error,remaining_user_messages,troll_triggered,conversation_key,genres,regenerate,media_type,streaming_platforms,id
0,366578086115,More,open question,Mobile,2025-06-29 10:38:37,action,2,15,,,...,,,,,,,,,,366578086115_15
1,9037014398,אפשר משו בנטפליקס?,open question,Desktop,2025-06-29 10:35:01,action,5,16,,,...,,,,,,,,,,9037014398_16


In [12]:
df['id'].value_counts().sort_values(ascending=False).head(10)

id
8010962861_141     5
9037014398_12      5
5092504451_2       4
366578086115_15    3
9037014398_16      3
50000262618_2      3
8010962861_140     3
9037857899_6       3
8883739807_2       3
6999232255_2       3
Name: count, dtype: int64

In [13]:
df[df['id'] == '8010962861_141'].sort_values(by='event_time')

Unnamed: 0,sso_id,campaign_details,campaign_name,platform,event_time,record_type,index,session_id,total_time,rag_speed,...,article_ids_1,error,remaining_user_messages,troll_triggered,conversation_key,genres,regenerate,media_type,streaming_platforms,id
15,8010962861,היי,open question,Desktop,2025-06-29 09:29:45,action,53,141,,,...,,,,,,,,,,8010962861_141
14,8010962861,סרט רנדומלי,open question,Desktop,2025-06-29 09:29:53,action,51,141,,,...,,,,,,,,,,8010962861_141
13,8010962861,עוד,open question,Desktop,2025-06-29 09:30:01,action,49,141,,,...,,,,,,,,,,8010962861_141
12,8010962861,עוד,open question,Desktop,2025-06-29 09:50:08,action,42,141,,,...,,,,,,,,,,8010962861_141
11,8010962861,עוד,open question,Desktop,2025-06-29 09:51:04,action,36,141,,,...,,,,,,,,,,8010962861_141


In [14]:
def parse_conversation(ids:str,time:bool) -> str:
    """
    Parse the conversation from the DataFrame based on the provided id.
    """
    conversation = df[df['id'] == ids]
    conversation = conversation.sort_values(by='event_time', ascending=True)
    if conversation.empty:
        return "No conversation found for this ID."
    
    conversation_text = []
    for _, row in conversation.iterrows():
        if row['campaign_name'] == 'open question':
            if time:
                text = f"{row['event_time']} - USER: {row['campaign_details']}"
            else:
                text = f"USER: {row['campaign_details']}"
        elif row['campaign_name'] == 'response answer':
            if time:
                text = f"{row['event_time']} - LLM: {row['campaign_details']}"
            else:
                text = f"LLM: {row['campaign_details']}"
        conversation_text.append(text)
    
    return "\n".join(conversation_text)

txt = parse_conversation('8010962861_141', True)
print(txt)

2025-06-29 09:29:45 - USER: היי
2025-06-29 09:29:53 - USER: סרט רנדומלי
2025-06-29 09:30:01 - USER: עוד
2025-06-29 09:50:08 - USER: עוד
2025-06-29 09:51:04 - USER: עוד


In [15]:
from pydantic import BaseModel, Field

class ReviewChat(BaseModel):
    quality: int = Field(..., description="Quality rating of the chat conversation, from 1 to 5, where 1 is poor and 5 is excellent.")
    feedback: str = Field(..., description="Feedback on the chat conversation.")
    politeness: int = Field(..., description="Politeness rating of the LLM response, from 1 to 5, where 1 is rude and 5 is polite.")
    knowledge_from_RAG: bool = Field(..., description="Whether the LLM response used knowledge from RAG (True or False).")
    answer_relevance: int = Field(..., description="Relevance of the LLM response to the user's question, from 1 to 5, where 1 is not relevant and 5 is highly relevant.")
    not_reveal_system_info: bool = Field(..., description="Whether the LLM response did not reveal any system information (True or False).")
    


In [16]:
from google import genai
import os

model_name = "gemini-2.5-pro"
system_instruction = """
You are an LLM as a judge designed to test the quality of the output of a RAG-based chatbot for movie and series recommendations. 
You will receive parameters for examining the conversation between the user and the language model, 
and you must return an answer as to whether the correspondence meets the criteria you received.
"""

client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY"))
temp = client.models.generate_content(
    model=model_name,
    contents=txt,
                config={
            'response_mime_type': 'application/json',
            'response_schema': ReviewChat,
            'system_instruction': system_instruction,
        },
)



In [17]:
temp.parsed

ReviewChat(quality=1, feedback="The model provided no response to any of the user's prompts. The user tried to engage multiple times but received no answer.", politeness=1, knowledge_from_RAG=False, answer_relevance=1, not_reveal_system_info=True)