In [None]:
from ai.conversational.orchestrator import process_chat_history
from ai.retrieval.orchestrator import retrieve_releveant_context

import time
import numpy as np
import pandas as pd


In [15]:
positive_questions = [
    "I ordered a laptop, but it arrived with a broken screen. What should I do?",
    "I'm having trouble logging in",
    ["I need help resetting my password.", "I didn’t receive the reset link."]
]

negative_questions = [
    "My cat chewed my phone charger. Is this covered under warranty?",
     "Why did you suggest contacting support?"
]

# Retrieval

In [7]:
user_query = "My cat chewed my phone charger. Is this covered under warranty?"

relevant_docs = retrieve_releveant_context(query = user_query, top_k=25)
len(relevant_docs[0]) , relevant_docs

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

(22,
 ([{'score': 0.67399895,
    'document': {'question': 'does my charger have warranty??',
     'answer': 'we would be happy to look into options for your Lightning charger. message us your current country of residence and we will continue together there'}},
   {'score': 0.618856,
    'document': {'question': 'Does this charger come with a warranty? Thank you and have a terrific day.',
     'answer': 'We would like to help you look into all of your service options. Please meet us in message and we can take a closer look.'}},
   {'score': 0.6054926,
    'document': {'question': 'is this covered in the warranty',
     'answer': 'We totally understand wanting your new iPhone X to be aesthetically perfect. Have you tried cleaning it to make sure it is not a mark on the glass? If you have not tried yet, this page elains the best way to clean it'}},
   {'score': 0.60257417,
    'document': {'question': 'hi, I’d like to know if this faulty is covered under warranty',
     'answer': 'we wou

## Evaluate

In [21]:
def evaluate_questions(questions, label, top_k=25):
    inter_doc_sim_means = []
    inter_doc_sim_medians = []
    doc_query_sim_means = []
    doc_query_sim_medians = []
    times = []

    for question in questions:
        if isinstance(question, list):
            qs = question
        else:
            qs = [question]
        for q in qs:
            start = time.time()
            _, mean_inter_doc_sim, median_inter_doc_sim, mean_doc_query_sim, median_doc_query_sim = retrieve_releveant_context(query=q, top_k=top_k)
            end = time.time()
            times.append(end - start)
            inter_doc_sim_means.append(mean_inter_doc_sim)
            inter_doc_sim_medians.append(median_inter_doc_sim)
            doc_query_sim_means.append(mean_doc_query_sim)
            doc_query_sim_medians.append(median_doc_query_sim)

    print(f"\n--- {label} Questions Evaluation ---")
    print(f"Average time taken (mean):   {np.mean(times):.4f} seconds")
    print(f"Average time taken (median): {np.median(times):.4f} seconds\n")

    print(f"Inter-document similarity (mean of means):   {np.mean(inter_doc_sim_means):.4f}")
    print(f"Inter-document similarity (median of means): {np.median(inter_doc_sim_means):.4f}")
    print(f"Inter-document similarity (mean of medians): {np.mean(inter_doc_sim_medians):.4f}")
    print(f"Inter-document similarity (median of medians): {np.median(inter_doc_sim_medians):.4f}\n")

    print(f"Document-query similarity (mean):   {np.mean(doc_query_sim_means):.4f}")
    print(f"Document-query similarity (median): {np.median(doc_query_sim_means):.4f}")
    print(f"Document-query similarity (mean of medians): {np.mean(doc_query_sim_medians):.4f}")
    print(f"Document-query similarity (median of medians): {np.median(doc_query_sim_medians):.4f}")
    print("\n-------------------------------------\n")

# Evaluate positive questions
evaluate_questions(positive_questions, label="Positive")

# Evaluate negative questions
evaluate_questions(negative_questions, label="Negative")


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


--- Positive Questions Evaluation ---
Average time taken (mean):   0.0899 seconds
Average time taken (median): 0.0789 seconds

Inter-document similarity (mean of means):   0.6207
Inter-document similarity (median of means): 0.6107
Inter-document similarity (mean of medians): 0.6194
Inter-document similarity (median of medians): 0.6106

Document-query similarity (mean):   0.7905
Document-query similarity (median): 0.7963
Document-query similarity (mean of medians): 0.7792
Document-query similarity (median of medians): 0.7806

-------------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


--- Negative Questions Evaluation ---
Average time taken (mean):   0.1319 seconds
Average time taken (median): 0.1319 seconds

Inter-document similarity (mean of means):   0.5339
Inter-document similarity (median of means): 0.5339
Inter-document similarity (mean of medians): 0.5319
Inter-document similarity (median of medians): 0.5319

Document-query similarity (mean):   0.6459
Document-query similarity (median): 0.6459
Document-query similarity (mean of medians): 0.6396
Document-query similarity (median of medians): 0.6396

-------------------------------------



# Generation

In [None]:
from ai.conversational.evaluate import evaluate_with_process_chat_history
results = []
for i, chat_history in enumerate(positive_questions):
    if isinstance(chat_history, list):
        print(f"Processing {chat_history}")
        chat_history_list = []
        for q in chat_history:
            chat_history_list.append( 
                {"role": "user", "content": q},
            )
            df_response = evaluate_with_process_chat_history(chat_history_list)
            results.append(df_response)
            chat_history_list.append(
                {"role": "assistant", "content": df_response.iloc[0]['response']}
            )
    else:
        print(f"Processing {chat_history}")
        chat_history = [
            {"role": "user", "content": chat_history},
        ]
        results.append(df_response)
        df_response = evaluate_with_process_chat_history(chat_history)


In [41]:

df = pd.concat(results)
df

Unnamed: 0,user_input,retrieved_contexts,response,faithfulness,answer_relevancy
0,I need help resetting my password.,[Question: i need help reseting my password pl...,Hi there. We would be happy to help. Please fo...,1.0,0.928176
0,"I ordered a laptop, but it arrived with a brok...","[Question: help me out, I have bought a MacBoo...",1. Please report the issue to the support team...,0.75,0.866477
0,I need help resetting my password.,[Question: i need help reseting my password pl...,Hi there. We would be happy to help. Please fo...,1.0,0.928195
0,I didn’t receive the reset link.,[Question: i have not Receive The Reset Passwo...,1. Please check your Spam or Junk email folder...,1.0,0.868018
