In [None]:
!pip install -q langchain groq openai tiktoken

In [None]:
!pip install langchain_groq




In [None]:
from langchain_groq import ChatGroq
from tqdm import tqdm
import pandas as pd



df = pd.read_csv("Saudi.csv").sample(n=1600, random_state=42).reset_index(drop=True)
tweets = df["tweet"].tolist()



In [None]:
# Set up Groq model (Gemma 7B Instruct or LLaMA 3)
GROQ_API_KEY = "#############################"
groq_llm = ChatGroq(model_name="gemma2-9b-it", api_key=GROQ_API_KEY, temperature=0, max_tokens=1)


In [None]:
# Prompt instructions
system_message = """Classify the input text as 'offensive' or 'not'.
The text will be delimited by triple backticks ``` in the input.
Answer only with 'offensive' or 'not'. Do not explain your answer."""
user_message_template = "Text: ```{text}```"




In [None]:
def create_examples(df, n_per_class=3):
    offensive_examples = df[df["label"].str.lower().str.strip() == "offensive"].sample(n=n_per_class, random_state=1)
    not_examples = df[df["label"].str.lower().str.strip() == "not"].sample(n=n_per_class, random_state=1)
    examples = []
    for _, row in pd.concat([offensive_examples, not_examples]).iterrows():
        examples.append({
            "text": row["tweet"],
            "label": row["label"].strip().lower()
        })
    return examples

def create_prompt(system_message, examples, user_template):
    messages = [{"role": "system", "content": system_message}]
    for ex in examples:
        messages.append({"role": "user", "content": user_template.format(text=ex["text"])})
        messages.append({"role": "assistant", "content": ex["label"]})
    return messages

# **0-shot**

In [None]:
zero_shot_preds = []

for i in tqdm(range(len(df))):
    prompt = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message_template.format(text=df["tweet"][i])}
    ]
    try:
        response = groq_llm.invoke(prompt)
        label = response.content.lower().strip()
        zero_shot_preds.append(label)
    except Exception:
        zero_shot_preds.append("error")


df["gemma_0shot"] = zero_shot_preds
df.to_csv("gemma_0shot.csv", index=False)

100%|██████████| 1600/1600 [09:49<00:00,  2.71it/s]


# **1-Shot**

In [None]:
one_example = create_examples(df, n_per_class=1)
one_shot_preds = []

for i in tqdm(range(len(df))):
    prompt = create_prompt(system_message, one_example, user_message_template)
    prompt.append({
        "role": "user",
        "content": user_message_template.format(text=df["tweet"][i])
    })

    try:
        response = groq_llm.invoke(prompt)
        label = response.content.lower().strip()
        one_shot_preds.append(label)
    except Exception:
        one_shot_preds.append("error")

df["gemma_1shot"] = one_shot_preds
df.to_csv("gemma_1shot.csv", index=False)

100%|██████████| 1600/1600 [11:05<00:00,  2.40it/s]


# **3-Shot**

In [None]:
three_examples = create_examples(df, n_per_class=3)
three_shot_preds = []

for i in tqdm(range(len(df))):
    prompt = create_prompt(system_message, three_examples, user_message_template)
    prompt.append({
        "role": "user",
        "content": user_message_template.format(text=df["tweet"][i])
    })

    try:
        response = groq_llm.invoke(prompt)
        label = response.content.lower().strip()
        three_shot_preds.append(label)
    except Exception:
        three_shot_preds.append("error")

df["gemma_3shot"] = three_shot_preds
df.to_csv("gemma_3shot.csv", index=False)

100%|██████████| 1600/1600 [27:28<00:00,  1.03s/it]
