In [1]:
from openai import OpenAI
import os
import pandas as pd
import requests
import time

In [2]:
script_dir = os.path.dirname('.')
df = pd.read_csv(os.path.join(script_dir, 'datasets', 'Meezan-HR-synthetic-testset.csv'))
queries = df['question'].tolist()
print(f"{len(queries)} queries loaded")

10 queries loaded


In [46]:
print(queries)

['What is the dress code for female employees in the workplace?', 'What is the purpose and process of a domestic inquiry?', 'What is the role of a management representative in the inquiry proceedings?', 'What are the vehicle arrangement options for employees in EVP grade and above?', 'What are the incentives provided for passing the Junior Associateship of IBP exams?', "What are the consequences for employees who don't follow office discipline, especially regarding punctuality and reading newspapers during office hours?", "What support does Meezan Bank provide to an employee's family in case of death, and where does it come from?", 'What is the purpose of the HR Guidelines & Procedure Document at Meezan Bank Limited?', 'What are the guidelines for recruitment, selection, and placement in the organization, including dress code and office timings?', "What are the steps and requirements for an employee to receive a cash incentive in the company's sales commission structure?"]


In [3]:
def rag_completion(chat_id: int, query: str, eval_approach: str):
    '''
    makes post request to RAG app endpoint to get AI response. 
    Passes chat_id, query and use_litellm flag in the request body.
    '''
    url = f'http://localhost:8000/api/chat/{chat_id}/get_ai_response/'
    data = {
        'query': query,
        'eval_approach': eval_approach
    }
    response = requests.post(url=url, data=data)
    return response
    
    

In [12]:
CHAT_ID = 3  # existing Meezan HR chat id

response = rag_completion(chat_id=CHAT_ID, query=queries[0], eval_approach='litellm_proxy')
response

<Response [200]>

In [13]:
response.json().get('ai_message').get('content')


"The dress code for female employees in the workplace includes the following guidelines:\n\n1. **Elegant Dressing**: Female employees should dress elegantly with a Headscarf and Abaya, without being ostentatious.\n\n2. **Modest Attire**: An appropriate dress code includes Shalwar Kameez or any modest dressing, along with an Abaya.\n\n3. **Headscarf and Abaya**: Female staff are required to wear Hijabs (a scarf covering the entire head and hair) and a gown covering sleeves while on duty, training, and on clients' visits, as required by Islamic injunctions.\n\n4. **Discretion with Makeup and Jewelry**: Female staff are expected to exercise discretion in their choice of makeup and jewelry. Nails should not be longer than medium and may only be polished with neutral colors.\n\n5. **Body Hygiene**: Proper care of body hygiene is a must for all female employees.\n\nThese guidelines aim to maintain a professional appearance while also respecting cultural and religious considerations in the wo

## Evaluate latency for open AI RAG approach

In [14]:
CHAT_ID = 3  # existing Meezan HR chat id

responses = []
start_time = time.time()
for query in queries:
    response = rag_completion(chat_id=CHAT_ID, query=query, eval_approach='openai')
    responses.append(response.json().get('ai_message').get('content'))

end_time = time.time()
time_taken = end_time - start_time

df['openai_rag_response'] = responses
df.to_csv(os.path.join(script_dir, 'datasets', 'Meezan-HR-synthetic-testset.csv'), index=False)

print(f"Total time taken for {len(queries)} queries with direct OpenAI RAG approach: {time_taken} seconds")

Total time taken for 10 queries with direct OpenAI RAG approach: 45.16123628616333 seconds


## Evaluate latency for litellm proxy RAG approach

In [16]:
CHAT_ID = 3  # existing Meezan HR chat id

responses = []
start_time = time.time()
for query in queries:
    response = rag_completion(chat_id=CHAT_ID, query=query, eval_approach='litellm_proxy')
    responses.append(response.json().get('ai_message').get('content'))

end_time = time.time()
time_taken = end_time - start_time

df['litellm_proxy_rag_response'] = responses
df.to_csv(os.path.join(script_dir, 'datasets', 'Meezan-HR-synthetic-testset.csv'), index=False)

print(f"Total time taken for {len(queries)} queries with litellm proxy approach: {time_taken} seconds")

Total time taken for 10 queries with litellm proxy approach: 56.30036687850952 seconds


## Evaluate latency for litellm proxy RAG approach with fallback being used (set up wrong api in proxy for gpt-4-omni)

In [18]:
CHAT_ID = 3  # existing Meezan HR chat id

responses = []
start_time = time.time()
for query in queries:
    response = rag_completion(chat_id=CHAT_ID, query=query, eval_approach='litellm_proxy')
    responses.append(response.json().get('ai_message').get('content'))

end_time = time.time()
time_taken = end_time - start_time

print(f"Total time taken for {len(queries)} queries with litellm proxy with fallback approach: {time_taken} seconds")

Total time taken for 10 queries with litellm proxy with fallback approach: 206.12182474136353 seconds


## Evaluate latency for litellm proxy RAG approach with non gpt model (openrouter mistral 7b)

In [19]:
# try one call first
response = rag_completion(chat_id=CHAT_ID, query=queries[0], eval_approach='litellm_proxy')
response.json().get('ai_message').get('content')

' According to the provided context, the dress code for female employees in Meezan Bank Limited is as follows:\n\n1. Female employees should dress elegantly with a headscarf and Abaya, without being ostentatious.\n2. An appropriate dress code includes Shalwar Kameez/any modest dressing, with Abaya.\n3. Female staff are required to wear Hijabs (a scarf covering the entire head and hair and a gown covering sleeves) while on duty, training, and on clients’ visit, as required by Islamic injunction.\n4. Female staff is expected to exercise discretion in their choice of make-up & jewelry. Nails should not be longer than medium & may be polished only with neutral colors.\n5. Proper care of body hygiene is a must.'

In [23]:
# litellm_proxy_nongpt approach
CHAT_ID = 3  # existing Meezan HR chat id

responses = []
start_time = time.time()
for query in queries:
    response = rag_completion(chat_id=CHAT_ID, query=query, eval_approach='litellm_proxy_nongpt')
    responses.append(response.json().get('ai_message').get('content'))
    
end_time = time.time()
time_taken = end_time - start_time

print(f"Total time taken for {len(queries)} queries with litellm proxy with nongpt approach: {time_taken} seconds")

Total time taken for 10 queries with litellm proxy with nongpt approach: 71.99534797668457 seconds
