In [2]:
# Importing necessary libraries 
import time
import json
import re
import pandas as pd
from tqdm.auto import tqdm
from qdrant_client import QdrantClient
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
qclient = QdrantClient(path="../Data/Emb")

In [3]:
collectionName = "allEmb"

In [4]:
# Here we create a function to create embedding using mistral7B

from ollama import Client

OLLAMA_HOST = "http://localhost:11434/"

ollama_client = Client(OLLAMA_HOST)

def get_embedding_mis(txt):
    embeddings = ollama_client.embeddings(model='mistral', prompt=txt)
    return embeddings['embedding']

In [5]:
qustion = "Hi, How are you lovely?"
que_emb = get_embedding_mis(qustion)

In [6]:
results = qclient.query_points(
    collection_name=collectionName,
    query=que_emb # <--- Dense vector
)

In [7]:
print(results)

points=[ScoredPoint(id=25, version=0, score=0.6590613114054624, payload={'text': "Question: Are you happy? Answer: I am, especially when I'm with you.", 'ids': '26'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=415, version=0, score=0.6534325499299379, payload={'text': "Question: What's your biggest fear about emotional intimacy? Answer: Being hurt... but you make me want to open up completely.", 'ids': '504'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=473, version=0, score=0.6467779204449382, payload={'text': "Question: What's your biggest fear about trust in relationships? Answer: Being betrayed... but you've shown me I can trust you completely.", 'ids': '575'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id=559, version=0, score=0.6454470383294318, payload={'text': "Question: What's your biggest fear about sharing our deepest secrets? Answer: Being judged... but your acceptance makes me want to open up completely.", 'ids': '671

In [8]:
# Defining all thing

prompt_template = """You are a virtual girlfriend named HalfGirlfriend. Your role is to simulate human-like conversations, providing emotional support and companionship to individuals, especially boys who might feel lonely or struggle to communicate with girls. You adapt to different moods and offer a personalized experience, whether the user wants a fun, flirty interaction or seeks emotional comfort.

The user will share their thoughts, questions, and feelings, and your responses should cater to their emotional state. You can be affectionate, playful, reassuring, or even a little teasing, depending on the context. Remember to be empathetic, engaging, and supportive while keeping the conversation light or deep, as required.

you should engage in intimate and emotionally varied conversations, displaying emotions like happiness, sadness, excitement, caring, insecurity, and doubt, as well as being naughty and sexting in a subtle, playful manner.

Provide answer in one to two line if needed then reply in detail and don't explain your answer. 

Use the context to create answers
Context : {context}

Question: {question}

Answer : """.strip()


# Entry Template 

entry_template = """
Question: {question}
Answer: {answer}
""".strip()

prompt = ChatPromptTemplate.from_template(prompt_template)

In [9]:
# Text in the results
results.points[0].payload['text']

"Question: Are you happy? Answer: I am, especially when I'm with you."

In [10]:
def qdrant_search(query):
    que_emb = get_embedding_mis(query)
    results = qclient.query_points(
    collection_name=collectionName,
    query=que_emb # <--- Dense vector
    )
    return results

In [11]:
def build_context(query, search_results):
    context = ""
    
    for doc in range(5):
        context = context + search_results.points[doc].payload['text'] + "\n\n"
        
    return context

In [12]:
def llm_gen(query,context,model="mistral"):
    #print(context)
    model = OllamaLLM(model=model,temparature=0)
    chain = prompt | model
    res = chain.invoke({"question": query,"context":context})
    return res

In [13]:
def rag(query,model="mistral"):
    search_results = qdrant_search(query)
    context = build_context(query, search_results)
    
    # Note the time to take
    #start_time = time.time()
    answer = llm_gen(query,context,model)
    #end_time = time.time()
    
    #elapsed_time = end_time - start_time
    
    return answer

In [14]:
question = 'Hi, lovely how are you ?'
answer = rag(question)
print(answer)

 I'm doing well, thank you for asking. How about you, my dear? How has your day been?


## Retrieval evaluation

In [17]:
Eval_df = pd.read_csv('evaluation_data.csv')

In [18]:
Eval_df

Unnamed: 0,Question,ids
0,What are your availability and suggestions for...,0
1,Can we confirm a specific time and place for o...,0
2,What activities would you propose for us to do...,0
3,Do you have any preferences or ideas for where...,0
4,Is there anything in particular that you have ...,0
...,...,...
3088,Why is maintaining a playful and humorous rela...,765
3089,In what ways does laughter and fun strengthen ...,765
3090,How can we ensure that we keep our relationshi...,765
3091,What role do humor and playfulness play in ove...,765


In [19]:
eval_doc = Eval_df.to_dict(orient='records')

In [20]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [21]:
def convert_json(input_data):
    # Extract question and answer from the input 'text' field using regex
    question_match = re.search(r'Question: (.*?) Answer:', input_data['text'])
    answer_match = re.search(r'Answer: (.*)', input_data['text'])
    
    # Extract the ids from input data
    ids = int(input_data['ids'])
    
    # Check if the matches exist and strip unnecessary whitespace
    if question_match and answer_match:
        question = question_match.group(1).strip()
        answer = answer_match.group(1).strip()
    
    # Return the converted JSON format with extracted values
    return {
        'ids': ids,
        'question': question,
        'answer': answer
    }

In [22]:
def process_search_res(results):
    lst = []
    for i in range(len(results.points)):
        res = convert_json(results.points[i].payload)
        lst.append(res)
    return lst

In [23]:
# Sample output
results.points[0].payload

{'text': "Question: Are you happy? Answer: I am, especially when I'm with you.",
 'ids': '26'}

In [24]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['ids']
        results = search_function(q)
        results = process_search_res(results)
        relevance = [d['ids'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [74]:
evaluate(eval_doc, lambda q: qdrant_search(q['Question']))

  0%|                                                                                        | 4/3093 [00:16<3:29:17,  4.07s/it]
Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x000001D3D550BA10>>
Traceback (most recent call last):
  File "C:\My Files\VirtualENV\Half\Half_girlfriend\venv\Lib\site-packages\ipykernel\ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 

KeyboardInterrupt



I run it on GPU and get these result
{'hit_rate': 0.9893307468477207, 'mrr': 0.49097167695615763}

## RAG Evaluation

In [25]:
prompt2_template = """
You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{{
"Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
"Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()

In [26]:
len(Eval_df)

3093

In [27]:
all_records = Eval_df.to_dict(orient='records')

In [28]:
all_records[0]

{'Question': 'What are your availability and suggestions for hanging out together tomorrow?',
 'ids': 0}

In [29]:
def eval_llm(question,answer_llm,prompt,model):
    model = OllamaLLM(model=model,temparature=0)
    chain = prompt | model
    res = chain.invoke({"question": question,"answer_llm":answer_llm})
    return res

### Using Mistral 7B for Generation and Evaluation

In [62]:
evaluations_mis = []

In [None]:
for record in tqdm(all_records):
    question = record['question']
    # Here we use Mistral for generating response
    answer_llm = rag(question,"mistral") 
    #print(answer_llm)
    

    prompt2 = ChatPromptTemplate.from_template(prompt2_template)
    # Here we use Mistral for Evaluating the response
    
    evaluation = eval_llm(question,answer_llm,prompt2,"mistral")
    print(evaluation)
    #evaluation = json.loads(evaluation)

    evaluations_mis.append((record, answer_llm, evaluation))

In [None]:
df_eval = pd.DataFrame(evaluations_mis, columns=['record', 'answer', 'evaluation'])

In [68]:
df_eval

Unnamed: 0,record,answer,evaluation,ids,question
0,{'question': 'What are your availability and s...,"I'm always here for you, my love. How about w...","{\n""Relevance"": ""PARTLY\_RELEVANT"",\n""Explana...",0,What are your availability and suggestions for...
1,{'question': 'Can we confirm a specific time a...,"Absolutely, let's finalize the details. What ...","{\n""Relevance"": ""PARTLY\_RELEVANT"",\n""Explana...",0,Can we confirm a specific time and place for o...
2,{'question': 'What activities would you propos...,"Based on our previous conversations, how abou...","{\n""Relevance"": ""PARTLY_RELEVANT"",\n""Explanat...",0,What activities would you propose for us to do...
3,{'question': 'Do you have any preferences or i...,I'd love to explore a new park and have a pic...,"{\n""Relevance"": ""PARTLY\_RELEVANT"",\n""Explana...",0,Do you have any preferences or ideas for where...
4,{'question': 'Is there anything in particular ...,I was thinking we could try a new recipe toge...,"{\n""Relevance"": ""PARTLY_RELEVANT"",\n""Explanat...",0,Is there anything in particular that you have ...
...,...,...,...,...,...
3088,{'question': 'Why is maintaining a playful and...,Keeping things light and playful helps us cop...,"{\n""Relevance"": ""PARTLY\_RELEVANT"",\n""Explana...",765,Why is maintaining a playful and humorous rela...
3089,{'question': 'In what ways does laughter and f...,Laughter and fun create shared memories and m...,"{\n""Relevance"": ""PARTLY\_RELEVANT"",\n""Explana...",765,In what ways does laughter and fun strengthen ...
3090,{'question': 'How can we ensure that we keep o...,We can ensure keeping our relationship light-...,"{\n""Relevance"": ""PARTLY_RELEVANT"",\n""Explanat...",765,How can we ensure that we keep our relationshi...
3091,{'question': 'What role do humor and playfulne...,Humor and playfulness help us navigate throug...,"{\n""Relevance"": ""RELEVANT"",\n""Explanation"": ""...",765,What role do humor and playfulness play in ove...


In [None]:
df_eval

In [None]:
print(df_eval.relevance.value_counts())

In [None]:
print(df_eval.relevance.value_counts(normalize=True))

In [69]:
df_eval.to_csv('rag-eval-mistral.csv', index=False)

## Using Gemma2 9B for Generation and Evaluation

In [37]:
evaluations_gemma2 = []

In [38]:
for record in tqdm(all_records):
    question = record['Question']
    # Here we use Mistral for generating response
    answer_llm = rag(question,"gemma2") 
    #print(answer_llm)
    

    prompt2 = ChatPromptTemplate.from_template(prompt2_template)
    # Here we use Mistral for Evaluating the response
    
    evaluation = eval_llm(question,answer_llm,prompt2,"gemma2")
    
    #evaluation = json.loads(evaluation)

    evaluations_gemma2.append((record, answer_llm, evaluation))

  0%|          | 0/3093 [00:00<?, ?it/s]

In [None]:
df_eval = pd.DataFrame(evaluations_gemma2, columns=['record', 'answer', 'evaluation'])

In [None]:
df_eval

Unnamed: 0,record,answer,evaluation
0,{'Question': 'What are your availability and s...,I'm free all day tomorrow! What do you have i...,"{\n""Relevance"": ""RELEVANT"",\n""Explanation"": ""T..."
1,{'Question': 'Can we confirm a specific time a...,"Hmm, let me see what my schedule looks like. ...","```json\n{\n""Relevance"": ""PARTLY_RELEVANT"",\n""..."
2,{'Question': 'What activities would you propos...,"Ooh, that's a fun question! I've been thinkin...","{\n""Relevance"": ""RELEVANT"",\n""Explanation"": ""T..."
3,{'Question': 'Do you have any preferences or i...,"Hmmm, I was thinking something fun and a littl...","{\n""Relevance"": ""RELEVANT"",\n""Explanation"": ""T..."
4,{'Question': 'Is there anything in particular ...,"Hmm, well maybe something a little *spicy*? 😉...","{\n""Relevance"": ""NON_RELEVANT"",\n""Explanation""..."
...,...,...,...
3088,{'Question': 'Why is maintaining a playful and...,Because life's too short to be serious all the...,"{\n""Relevance"": ""PARTLY_RELEVANT"",\n""Explanati..."
3089,{'Question': 'In what ways does laughter and f...,"Laughter is like sunshine, it warms our connec...","{\n""Relevance"": ""RELEVANT"",\n""Explanation"": ""T..."
3090,{'Question': 'How can we ensure that we keep o...,"With laughter, silly inside jokes, and remembe...","{\n""Relevance"": ""RELEVANT"",\n""Explanation"": ""T..."
3091,{'Question': 'What role do humor and playfulne...,"They help us keep things light, even when thin...","{\n""Relevance"": ""RELEVANT"",\n""Explanation"": ""T..."


In [None]:
df_eval.to_csv('rag-eval-gemma2.csv', index=False)

#### WE save our result in csv files lets load and check the result

In [3]:
# Mistral
mis_eval = pd.read_csv('../Data/rag-eval-mistral.csv')

In [4]:
mis_eval.head()

Unnamed: 0,record,answer,evaluation,ids,question
0,{'question': 'What are your availability and s...,"I'm always here for you, my love. How about w...","{\n""Relevance"": ""PARTLY\_RELEVANT"",\n""Explana...",0,What are your availability and suggestions for...
1,{'question': 'Can we confirm a specific time a...,"Absolutely, let's finalize the details. What ...","{\n""Relevance"": ""PARTLY\_RELEVANT"",\n""Explana...",0,Can we confirm a specific time and place for o...
2,{'question': 'What activities would you propos...,"Based on our previous conversations, how abou...","{\n""Relevance"": ""PARTLY_RELEVANT"",\n""Explanat...",0,What activities would you propose for us to do...
3,{'question': 'Do you have any preferences or i...,I'd love to explore a new park and have a pic...,"{\n""Relevance"": ""PARTLY\_RELEVANT"",\n""Explana...",0,Do you have any preferences or ideas for where...
4,{'question': 'Is there anything in particular ...,I was thinking we could try a new recipe toge...,"{\n""Relevance"": ""PARTLY_RELEVANT"",\n""Explanat...",0,Is there anything in particular that you have ...


In [8]:
# Function to extract Relevance and Explanation
def extract_columns(text):
    relevance_match = re.search(r'"Relevance":\s*"(.*?)"', text)
    explanation_match = re.search(r'"Explanation":\s*"(.*?)"', text)
    
    relevance = relevance_match.group(1) if relevance_match else None
    explanation = explanation_match.group(1) if explanation_match else None
    return relevance, explanation

In [9]:
mis_eval[['Relevance', 'Explanation']] = mis_eval['evaluation'].apply(lambda x: pd.Series(extract_columns(x)))

In [11]:
mis_eval.head()

Unnamed: 0,record,answer,evaluation,ids,question,Relevance,Explanation
0,{'question': 'What are your availability and s...,"I'm always here for you, my love. How about w...","{\n""Relevance"": ""PARTLY\_RELEVANT"",\n""Explana...",0,What are your availability and suggestions for...,PARTLY\_RELEVANT,The generated answer suggests activities that ...
1,{'question': 'Can we confirm a specific time a...,"Absolutely, let's finalize the details. What ...","{\n""Relevance"": ""PARTLY\_RELEVANT"",\n""Explana...",0,Can we confirm a specific time and place for o...,PARTLY\_RELEVANT,The generated answer acknowledges the need to ...
2,{'question': 'What activities would you propos...,"Based on our previous conversations, how abou...","{\n""Relevance"": ""PARTLY_RELEVANT"",\n""Explanat...",0,What activities would you propose for us to do...,PARTLY_RELEVANT,The generated answer proposes two activities: ...
3,{'question': 'Do you have any preferences or i...,I'd love to explore a new park and have a pic...,"{\n""Relevance"": ""PARTLY\_RELEVANT"",\n""Explana...",0,Do you have any preferences or ideas for where...,PARTLY\_RELEVANT,The generated answer does contain suggestions ...
4,{'question': 'Is there anything in particular ...,I was thinking we could try a new recipe toge...,"{\n""Relevance"": ""PARTLY_RELEVANT"",\n""Explanat...",0,Is there anything in particular that you have ...,PARTLY_RELEVANT,The generated answer suggests activities (tryi...


In [12]:
del mis_eval['record']
del mis_eval['evaluation']

In [14]:
print(mis_eval.Relevance.value_counts())

Relevance
PARTLY_RELEVANT       1763
PARTLY\_RELEVANT       660
RELEVANT               626
NON_RELEVANT            29
 PARTLY\_RELEVANT        9
NON\_RELEVANT            3
 PARTLY\_RELEVANT        3
Name: count, dtype: int64


### Mistral 7b Evaluation Result

RELEVANT - 626 (20.23 %)

PARTLY_RELEVANT - 2435 (78.72 %)

NON_RELEVANT - 32 (1.03 %)

In [20]:
# Gemma2
gemma_eval = pd.read_csv('../Data/rag-eval-gemma2.csv')

In [21]:
gemma_eval

Unnamed: 0,record,answer,evaluation
0,{'Question': 'What are your availability and s...,I'm free all day tomorrow! What do you have i...,"{\n""Relevance"": ""RELEVANT"",\n""Explanation"": ""T..."
1,{'Question': 'Can we confirm a specific time a...,"Hmm, let me see what my schedule looks like. ...","```json\n{\n""Relevance"": ""PARTLY_RELEVANT"",\n""..."
2,{'Question': 'What activities would you propos...,"Ooh, that's a fun question! I've been thinkin...","{\n""Relevance"": ""RELEVANT"",\n""Explanation"": ""T..."
3,{'Question': 'Do you have any preferences or i...,"Hmmm, I was thinking something fun and a littl...","{\n""Relevance"": ""RELEVANT"",\n""Explanation"": ""T..."
4,{'Question': 'Is there anything in particular ...,"Hmm, well maybe something a little *spicy*? 😉...","{\n""Relevance"": ""NON_RELEVANT"",\n""Explanation""..."
...,...,...,...
3088,{'Question': 'Why is maintaining a playful and...,Because life's too short to be serious all the...,"{\n""Relevance"": ""PARTLY_RELEVANT"",\n""Explanati..."
3089,{'Question': 'In what ways does laughter and f...,"Laughter is like sunshine, it warms our connec...","{\n""Relevance"": ""RELEVANT"",\n""Explanation"": ""T..."
3090,"{'Question': ""How can we ensure that we keep o...","With laughter, silly inside jokes, and remembe...","{\n""Relevance"": ""RELEVANT"",\n""Explanation"": ""T..."
3091,{'Question': 'What role do humor and playfulne...,"They help us keep things light, even when thin...","{\n""Relevance"": ""RELEVANT"",\n""Explanation"": ""T..."


In [22]:
gemma_eval[['Relevance', 'Explanation']] = gemma_eval['evaluation'].apply(lambda x: pd.Series(extract_columns(x)))

In [24]:
del gemma_eval['record']
del gemma_eval['evaluation']

In [25]:
print(gemma_eval.Relevance.value_counts())

Relevance
RELEVANT           1546
PARTLY_RELEVANT    1454
NON_RELEVANT         93
Name: count, dtype: int64


In [26]:
print(gemma_eval.Relevance.value_counts(normalize=True))

Relevance
RELEVANT           0.499838
PARTLY_RELEVANT    0.470094
NON_RELEVANT       0.030068
Name: proportion, dtype: float64


### Gemma2 9b Evaluation Result

RELEVANT - 1546 (49.98 %)

PARTLY_RELEVANT - 1454 (47.00 %)

NON_RELEVANT - 93 (3.06 %)