In [3]:
import argilla as rg
import json
import pandas as pd

rg.init(api_url="http://localhost:6900/", api_key="admin.apikey")



In [4]:
file = open('chats-dump-2023-07-13.jsonl', 'r')
chat_list = []
for line in file:
    # Parse each line as a JSON object
    json_obj = json.loads(line.strip())
    chat_list.append(json_obj)

In [5]:
prompts = []
answers = []
oids = []
timestamps = []

for chat in chat_list:
    message = chat['messages']
    for mess in message:
        prompt = mess['prompt']
        reply = mess['answer']
        prompts.append(prompt)
        answers.append(reply)
        oids.append(chat['_id']['$oid'])
        timestamps.append(mess['timestamp']['$date'])

df = pd.DataFrame({'prompt': prompts, 'reply': answers, '_id': oids, 'timestamp': timestamps})

In [6]:
prev_context_dict = {}

# Iterate through rows
prev_contexts = []
for _, row in df.iterrows():
    _id = row['_id']
    if _id not in prev_context_dict:
        prev_context_dict[_id] = []

    prev_context = "\n\n".join(prev_context_dict[_id])
    prev_contexts.append(prev_context)

    # prev_context_dict[_id].append(f"**Prompt:** {row['prompt']} \n <div style='text-align: right'> **Reply:** {row['reply']} </div>")
    prev_context_dict[_id].append(f"**Prompt:** {row['prompt']} \n  **Reply:** {row['reply']}")

# Add 'prev_context' column to the DataFrame
df['prev_context'] = prev_contexts

In [8]:
dataset = rg.FeedbackDataset(
    guidelines="Given the following prompt, provide an appropriate reply",
    fields=[
        rg.TextField(name="context", title='Previous context of current prompt', required=False, use_markdown=True),
        rg.TextField(name="prompt", title='Current Prompt', use_markdown=True),
    ],
    questions=[
        rg.TextQuestion(
            name="reply",
            description="Provide your reply based on the given prompt",
            required=True,
        ),
    ]
)
#E6E6E6
dataset.add_records(
    [
        rg.FeedbackRecord(
            fields={
                "context": data['prev_context'],
                "prompt": data['prompt'],
                # "prompt": f"<span style='background:#B1B1B1'>{data['prompt']}</span>",
            },
            metadata={"chat_id": data['_id']},
        ) for data in df.head(100).to_dict('records')
    ]
)
dataset.push_to_argilla(name="creating-answer-demo-no-prev-context-adjusted-demo", workspace='argilla')

Pushing records to Argilla...: 100%|██████████| 4/4 [00:00<00:00,  5.78it/s]


<FeedbackDataset id=64f1699b-fdee-4da0-bd6b-99cee44ffa05 name=creating-answer-demo-no-prev-context-adjusted-demo workspace=Workspace(id=de99c898-074b-42f2-bb7c-15e25f8efd20, name=argilla, inserted_at=2023-08-18 14:11:56, updated_at=2023-08-18 14:11:56) url=http://localhost:6900/dataset/64f1699b-fdee-4da0-bd6b-99cee44ffa05/annotation-mode fields=[TextField(id=UUID('3ba4f652-2cdc-4a77-aefb-fd5d0be9c56f'), name='context', title='Previous context of current prompt', required=False, type='text', settings={'type': 'text', 'use_markdown': True}, use_markdown=True), TextField(id=UUID('95d1dfce-b975-42c9-b6f8-d945f1748e8e'), name='prompt', title='Current Prompt', required=True, type='text', settings={'type': 'text', 'use_markdown': True}, use_markdown=True)] questions=[TextQuestion(id=UUID('4387e305-ab1a-490c-9989-886da035b30a'), name='reply', title='Reply', description='Provide your reply based on the given prompt', required=True, type='text', settings={'type': 'text', 'use_markdown': False}, 

In [10]:

questions = [
    rg.RankingQuestion(
        name="response_ranking",
        title="Order the responses based on their accuracy and helpfulness:",
        required=True,
        values={"response-1": "Response 1", "response-2": "Response 2"} # or ["response-1", "response-2"]
    ),
    rg.RankingQuestion(
        name="response_ranking",
        title="Order the responses based on their accuracy and helpfulness:",
        required=True,
        values={"response-1": "Response 1", "response-2": "Response 2"} # or ["response-1", "response-2"]
    ),
    rg.RankingQuestion(
        name="response_ranking",
        title="Order the responses based on their accuracy and helpfulness:",
        required=True,
        values={"response-1": "Response 1", "response-2": "Response 2"} # or ["response-1", "response-2"]
    )
]

fields = [
    rg.TextField(name="prompt", required=True),
    rg.TextField(name="response-1", required=True),
    rg.TextField(name="response-2", required=True)
]

dataset = rg.FeedbackDataset(
	guidelines="Please, read the prompt carefully and...",
	questions=questions,
	fields=fields
)

dataset.push_to_argilla(name="ranking-task", workspace='argilla')
