In [30]:
import json
import pandas as pd

In [2]:
df = pd.read_csv('../Data/Final_data.csv')

In [3]:
df

Unnamed: 0,ids,question,answer
0,0,Can we hang out tomorrow?,I'd love that! Let's make some plans.
1,1,Is there something you're not telling me?,"No, but sometimes I worry you might not trust me."
2,2,What did you dream about?,"I dreamed about us, it was beautiful."
3,3,Why don't you trust me?,"It's not that I don't trust you, I just need s..."
4,5,How was your day?,It was good! How about yours?
...,...,...,...
633,761,What's your biggest fear about discussing our ...,Making mistakes... but I trust us to learn and...
634,762,What's your favorite thing about how we balanc...,The way we prioritize our connection while res...
635,763,How do you handle differences in our sleep hab...,By finding compromises that allow us both to f...
636,764,Do you ever feel like you're not adventurous e...,Sometimes... but you appreciate the stability ...


In [8]:
prompt_template = """
You emulate a user who's using our application.
Formulate 5 questions this user might ask based on record. The record
should contain the answer to the questions, and the questions should be complete and not too short.
If possible, use as fewer words as possible from the record. 

The record:

question: {question}
answer: {text}

Provide the output in parsable JSON without using code blocks:

["question1", "question2", ..., "question5"]
""".strip()

In [9]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM

In [10]:
prompt = ChatPromptTemplate.from_template(prompt_template)

In [12]:
model = OllamaLLM(model="mistral",temparature=0)

chain = prompt | model

In [13]:
def llm_gen(query,answer):
    res = chain.invoke({"question": query,"text":answer})
    return res

In [14]:
documents = df.to_dict(orient='records')

In [21]:
documents[1]

{'ids': 1,
 'question': "Is there something you're not telling me?",
 'answer': 'No, but sometimes I worry you might not trust me.'}

In [22]:
res = llm_gen(documents[1]['question'],documents[1]['answer'])

In [23]:
print(res)

 [
  "Are you ever concerned that I don't trust you?",
  "Have there been instances where you felt I might not fully trust you?",
  "Do you sometimes feel unsure about my level of trust in you?",
  "In what ways do you worry that I might not trust you?",
  "Can you elaborate on the situations where you worry I might not trust you?"
]


In [24]:
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [25]:
results = {}

In [34]:
for doc in tqdm(documents): 
    doc_id = doc['ids']
    if doc_id in results:
        continue

    questions = llm_gen(doc['question'],doc['answer'])
    results[doc_id] = questions

  1%|█▎                                                                                       | 9/638 [03:29<4:04:06, 23.28s/it]

KeyboardInterrupt



In [36]:
parsed_resulst = {}

for doc_id, json_questions in results.items():
    parsed_resulst[doc_id] = json.loads(json_questions)

In [38]:
doc_index = {d['ids']: d for d in documents}

In [39]:
final_results = []

for doc_id, questions in parsed_resulst.items():
    for q in questions:
        final_results.append((q, doc_id))

In [40]:
df = pd.DataFrame(final_results, columns=['question', 'ids'])

In [41]:
df.to_csv('evaluation-data.csv', index=False)