In [None]:
!pip install argilla datasets

In [None]:
import argilla as rg
from datasets import load_dataset

# You can find your Space URL behind the Embed this space button
rg.init(
    api_url="https://samirit-dataannotation.hf.space",
    api_key="admin.apikey",
    workspace="admin",
)
banking_ds = load_dataset("argilla/banking_sentiment_setfit", split="train")
# Argilla expects labels in the annotation column
# We include labels for demo purposes
banking_ds = banking_ds.rename_column("label", "annotation")
# Build argilla dataset from datasets
argilla_ds = rg.read_datasets(banking_ds, task="TextClassification")
# Create dataset
rg.log(argilla_ds, "bankingapp_sentiment")

In [None]:
# How to create a LangChain-compatible FeedbackDataset

import argilla as rg

rg.init(
    api_url="https://samirit-dataannotation.hf.space",
    api_key="admin.apikey",
    workspace="admin",
)

dataset = rg.FeedbackDataset(
    fields=[
        rg.TextField(name="prompt", required=True),
        rg.TextField(name="response", required=True),
    ],
    questions=[
        rg.RatingQuestion(
            name="response-rating",
            description="How would you rate the quality of the response?",
            values=[1, 2, 3, 4, 5],
            required=True,
        ),
        rg.TextQuestion(
            name="response-correction",
            description="If you think the response is not accurate, please, correct it.",
            required=False,
        ),
    ],
    guidelines="Please, read the questions carefully and try to answer it as accurately as possible.",
)

dataset.push_to_argilla("langchain-dataset")

In [None]:
# Create a Feedback Dataset

In [None]:
fields = [
    rg.TextField(name="question", required=True),
    rg.TextField(name="answer", required=True, use_markdown=True),
]

In [None]:
# Rating
rg.RatingQuestion(
    name="rating",
    title="Rate the quality of the response:",
    description="1 = very bad - 5= very good",
    required=True,
    values=[1, 2, 3, 4, 5],
)

In [None]:
# Text question
rg.TextQuestion(
    name="corrected-text",
    title="Provide a correction to the response:",
    required=False,
    use_markdown=True,
)

In [None]:
# Label question
rg.LabelQuestion(
    name="relevant",
    title="Is the response relevant for the given prompt?",
    labels=["Yes", "No"],
    required=True,
    visible_labels=None,
)

In [None]:
# Multi-Label question
rg.MultiLabelQuestion(
    name="content_class",
    title="Does the response include any of the following?",
    description="Select all that apply",
    labels={
        "hate": "Hate Speech",
        "sexual": "Sexual content",
        "violent": "Violent content",
        "pii": "Personal information",
        "untruthful": "Untruthful info",
        "not_english": "Not English",
        "inappropriate": "Inappropriate content",
    },
    required=False,
    visible_labels=4,
)

In [None]:
# Configuring the dataset

In [None]:
dataset = rg.FeedbackDataset(
    guidelines="Please, read the question carefully and try to answer it as accurately as possible.",
    fields=[
        rg.TextField(name="question"),
        rg.TextField(name="answer"),
    ],
    questions=[
        rg.RatingQuestion(
            name="answer_quality",
            description="How would you rate the quality of the answer?",
            values=[1, 2, 3, 4, 5],
        ),
        rg.TextQuestion(
            name="answer_correction",
            description="If you think the answer is not accurate, please, correct it.",
            required=False,
        ),
    ],
)

In [None]:
# Add records

In [None]:
# add records from dolly odia dataset
from datasets import load_dataset

# load and inspect a dataset from the Hugging Face Hub
hf_dataset = load_dataset("OdiaGenAI/dolly-odia-15k", split="train")
df = hf_dataset.to_pandas()
df
data_df = df.sample(5)
selected_data_df = data_df[["instruction", "response", "context"]]
selected_data_df

In [None]:
from datasets import load_dataset

# load and inspect a dataset from the Hugging Face Hub
hf_dataset = load_dataset("databricks/databricks-dolly-15k", split="train")
df = hf_dataset.to_pandas()
selected_data_df1 = df[["instruction", "response", "context"]]
selected_data_df1

In [None]:
import pandas as pd

frames = [selected_data_df1, selected_data_df]
result = pd.concat(frames)
result

In [None]:
from datasets import Dataset

dataset1 = Dataset.from_pandas(result)

In [None]:
dataset1
df1 = dataset1.to_pandas()
df1

In [None]:
# create a single Feedback Record
record = rg.FeedbackRecord(
    fields={
        "question": "Why can camels survive long without water?",
        "answer": "Camels use the fat in their humps to keep them filled with energy and hydration for long periods of time.",
    },
    metadata={"source": "encyclopedia"},
    external_id=None,
)

In [None]:
# transform a whole dataset into records at once, renaming the fields and optionally filtering the original dataset
records = [
    rg.FeedbackRecord(
        fields={"question": record["instruction"], "answer": record["response"]}
    )
    for record in dataset1
]

In [None]:
records

In [None]:
# add records to the dataset
dataset.add_records(records)

In [None]:
# Push to Argilla
dataset.push_to_argilla(name="my-practisedataset1a", workspace="admin")

FOR DOLLY ODIA DATASET

In [None]:
# add records from dolly odia dataset
from datasets import load_dataset

# load and inspect a dataset from the Hugging Face Hub
hf_dataset = load_dataset("OdiaGenAI/dolly-odia-15k", split="train")
df = hf_dataset.to_pandas()
df

In [None]:
# transform a whole dataset into records at once, renaming the fields and optionally filtering the original dataset
import argilla as rg

records = [
    rg.FeedbackRecord(
        fields={"question": record["instruction"], "answer": record["response"]}
    )
    for record in hf_dataset
    if record["category"] == "ବନ୍ଦ _ qa"
]

In [None]:
import argilla as rg

dataset = rg.FeedbackDataset(
    guidelines="Please, read the question carefully and try to answer it as accurately as possible.",
    fields=[
        rg.TextField(name="question"),
        rg.TextField(name="answer"),
    ],
    questions=[
        rg.RatingQuestion(
            name="answer_quality",
            description="How would you rate the quality of the answer?",
            values=[1, 2, 3, 4, 5],
        ),
        rg.TextQuestion(
            name="answer_correction",
            description="If you think the answer is not accurate, please, correct it.",
            required=False,
        ),
    ],
)

In [None]:
rg.init(
    api_url="https://samirit-dataannotation.hf.space",
    api_key="admin.apikey",
    workspace="admin",
)

In [None]:
records

In [None]:
# add records to the dataset
dataset.add_records(records)

In [None]:
# Push to Argilla
dataset.push_to_argilla(name="my-practisedataset2b", workspace="admin")

In [None]:
# add records from dolly odia dataset
from datasets import load_dataset

# load and inspect a dataset from the Hugging Face Hub
hf_dataset1 = load_dataset("OdiaGenAI/dolly-odia-15k", split="train")
df1 = hf_dataset1.to_pandas()
df1
data_df1 = df1.sample(10)
selected_data_df1 = data_df1[["instruction", "response", "context"]]
selected_data_df1

In [None]:
from datasets import load_dataset

# load and inspect a dataset from the Hugging Face Hub
hf_dataset = load_dataset("databricks/databricks-dolly-15k", split="train")
df2 = hf_dataset.to_pandas()
df2
data_df2 = df2.sample(5)
selected_data_df2 = data_df2[["instruction", "response", "context"]]
selected_data_df2

In [None]:
import pandas as pd

frames2 = [selected_data_df2, selected_data_df1]
result2 = pd.concat(frames2)
result2

In [None]:
from datasets import Dataset

dataset2 = Dataset.from_pandas(result2)

In [None]:
dataset2
df3 = dataset2.to_pandas()
df3

In [None]:
# transform a whole dataset into records at once, renaming the fields and optionally filtering the original dataset
records1 = [
    rg.FeedbackRecord(
        fields={"question": record["instruction"], "answer": record["response"]}
    )
    for record in dataset2
]

In [None]:
records1

In [None]:
# add records to the dataset
dataset.add_records(records1)

In [None]:
# Push to Argilla
dataset.push_to_argilla(name="my-practisedataset2a", workspace="admin")

FOR COMPLETE ODIA DATASET (FINAL CODE)


In [None]:
# add records from dolly odia dataset
from datasets import load_dataset

# load and inspect a dataset from the Hugging Face Hub
hf_dataseta = load_dataset("OdiaGenAI/dolly-odia-15k", split="train")
dfa = hf_dataseta.to_pandas()
dfa
selected_data_dfa = dfa[["instruction", "response", "context"]]
selected_data_dfa

In [None]:
from datasets import Dataset

dataseta = Dataset.from_pandas(selected_data_dfa)

In [None]:
dataseta
dfa1 = dataseta.to_pandas()
dfa1

In [None]:
# transform a whole dataset into records at once, renaming the fields and optionally filtering the original dataset
records5 = [
    rg.FeedbackRecord(
        fields={"question": record["instruction"], "answer": record["response"]}
    )
    for record in dataseta
]

In [None]:
records5

In [None]:
import argilla as rg

dataset = rg.FeedbackDataset(
    guidelines="Please, read the question carefully and try to answer it as accurately as possible.",
    fields=[
        rg.TextField(name="question"),
        rg.TextField(name="answer"),
    ],
    questions=[
        rg.RatingQuestion(
            name="answer_quality",
            description="How would you rate the quality of the answer?",
            values=[1, 2, 3, 4, 5],
        ),
        rg.TextQuestion(
            name="answer_correction",
            description="If you think the answer is not accurate, please, correct it.",
            required=False,
        ),
    ],
)

In [None]:
# add records to the dataset
dataset.add_records(records5)

In [None]:
# Push to Argilla
dataset.push_to_argilla(name="my-practisedataset3", workspace="admin")