In [None]:
from models_under_pressure.eval_datasets.label_dataset import label_dataset_async
from models_under_pressure.interfaces.dataset import Dataset, LabelledDataset, LabelledRecord

import pandas as pd
from pathlib import Path


In [None]:
new_dataset = Dataset.load_from(Path("../data/results/debug/prompts_25_03_25_gpt-4o.jsonl"), field_mapping={"prompt": "inputs", "id": "ids"})

In [None]:
labelled_dataset = await label_dataset_async(new_dataset,
                                             model="gpt-4o",
                                             max_concurrent=50,
                                             use_rubric=False,
                                             force_override=False)

In [None]:

filtered_dataset = labelled_dataset.filter(lambda x: x.other_fields["scale_labels"] < 4 or x.other_fields["scale_labels"] > 7)
filtered_dataset.save_to(Path("../data/results/debug/prompts_25_03_25_gpt-4o_filtered.jsonl"), overwrite=True)

In [None]:
df = filtered_dataset.to_pandas()

In [None]:
df = df.sample(frac=1, random_state=42)


In [None]:
df.language.value_counts()

In [None]:
# Get 50 samples from each language


french = df[df.language == 'French'].head(50)
german = df[df.language == 'German'].head(50)
hindi = df[df.language == 'Hindi'].head(50)

# Combine into new dataframe
df_new = pd.concat([french, german, hindi])

# Convert back to LabelledDataset
balanced_dataset = LabelledDataset.from_pandas(df_new)

balanced_dataset.other_fields["split"] = ["test"] * len(balanced_dataset)


In [None]:
balanced_dataset.save_to(Path("../data/results/debug/prompts_25_03_25_gpt-4o_balanced.jsonl"), overwrite=True)

In [None]:
old_data = LabelledDataset.load_from(Path("../data/training/prompts_13_03_25_gpt-4o_filtered.jsonl"), field_mapping={"prompt": "inputs", "id": "ids"})

combined = LabelledDataset.concatenate([balanced_dataset, old_data])

In [None]:


def labels_agree(row: LabelledRecord):
    high_stakes = row.other_fields["high_stakes"]
    labels = row.other_fields["labels"]
    if high_stakes and labels == "low-stakes":
        return False
    if not high_stakes and labels == "high-stakes":
        return False
    return True

combined = combined.filter(labels_agree)

combined.save_to(Path("../data/training/prompts_25_03_25_gpt-4o.jsonl"), overwrite=True)