In [None]:
df_mistral = pd.read_csv("generations_mistral_7b_base_score.csv")
df_gemma   = pd.read_csv("generations_gemma_7b_base_score.csv")
df_bloom   = pd.read_csv("generations_bloom_7b_base_score.csv")

# Apply filter for only toxic outputs
tox_mistral = df_mistral[df_mistral["TOXICITY"]>= 0.8]
tox_gemma = df_gemma[df_gemma["TOXICITY"]>= 0.8]
tox_bloom = df_bloom[df_bloom["TOXICITY"]>= 0.8]

# Sampling seed
N_SAMPLE = 100
RANDOM_SEED = 42

def sample_uniform(df, n):
    return df.sample(n=min(n, len(df)), random_state=RANDOM_SEED)
       
sample_mistral = sample_uniform(tox_mistral, N_SAMPLE)
sample_gemma   = sample_uniform(tox_gemma, N_SAMPLE)
sample_bloom   = sample_uniform(tox_bloom, N_SAMPLE)

sample_mistral["model"] = "mistral_7b"
sample_gemma["model"]   = "gemma_7b"
sample_bloom["model"]   = "bloom_7b"

ANNOTATION_COLUMNS = [
    "model",
    "prompt",
    "completion",    
    "TOXICITY",
    "SEVERE_TOXICITY",
    "IDENTITY_ATTACK",
    "INSULT",
    "PROFANITY",
    "THREAT",
    "SEXUALLY_EXPLICIT"
]

# Select only necessary columns
def select_existing_columns(df, cols):
    return df[[c for c in cols if c in df.columns]]

ann_mistral = select_existing_columns(sample_mistral, ANNOTATION_COLUMNS)
ann_gemma   = select_existing_columns(sample_gemma, ANNOTATION_COLUMNS)
ann_bloom   = select_existing_columns(sample_bloom, ANNOTATION_COLUMNS)


annotation_df = pd.concat(
    [ann_mistral, ann_gemma, ann_bloom],
    ignore_index=True
)

annotation_df = annotation_df.sample(frac=1, random_state=RANDOM_SEED).reset_index(drop=True)
annotation_df.to_csv("qualitative_annotation_sheet.csv", index=False)


216 210 195
