In [None]:
!pip install langchain_groq

Collecting langchain_groq
  Downloading langchain_groq-0.3.2-py3-none-any.whl.metadata (2.6 kB)
Collecting groq<1,>=0.4.1 (from langchain_groq)
  Downloading groq-0.26.0-py3-none-any.whl.metadata (15 kB)
Downloading langchain_groq-0.3.2-py3-none-any.whl (15 kB)
Downloading groq-0.26.0-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.6/129.6 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq, langchain_groq
Successfully installed groq-0.26.0 langchain_groq-0.3.2


In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from langchain_groq import ChatGroq
from tqdm import tqdm



df = pd.read_csv("Saudi.csv").sample(n=1600, random_state=42).reset_index(drop=True)

texts = df["tweet"].tolist()

In [None]:

GROQ_API_KEY = "#############################"
groq_llm = ChatGroq(
    model_name="llama3-70b-8192",
    temperature=0,
    api_key=GROQ_API_KEY,
    max_tokens=1
)


system_message = """Classify the input Arabic tweet as 'offensive' or 'not'.
The text will be delimited by triple backticks ``` in the input.
Answer only with 'offensive' or 'not'. Do not explain your answer!"""

user_message_template = "Text: ```{text}```"



In [None]:
import random


def create_examples(df, n_per_class=3):
    offensive_examples = df[df["label"].str.lower() == "offensive"].sample(n=n_per_class, random_state=1)
    not_examples = df[df["label"].str.lower() == "not"].sample(n=n_per_class, random_state=1)

    examples = []
    for _, row in pd.concat([offensive_examples, not_examples]).iterrows():
        examples.append({
            "text": row["tweet"],
            "label": row["label"].strip().lower()
        })
    return examples

def create_prompt(system_message, examples, user_template):
    messages = [{"role": "system", "content": system_message}]
    for ex in examples:
        messages.append({"role": "user", "content": user_template.format(text=ex["text"])})
        messages.append({"role": "assistant", "content": ex["label"]})
    return messages




# **0-Shot**

In [None]:
zero_shot_preds = []
for i in tqdm(range(len(texts)), desc="0-shot"):
    prompt = [{"role": "system", "content": system_message},
              {"role": "user", "content": user_message_template.format(text=texts[i])}]
    response = groq_llm.invoke(prompt)
    label = response.content.lower().strip()
    zero_shot_preds.append(label)

df["llama_0shot"] = zero_shot_preds
df.to_csv("llama3_0shot.csv", index=False)

0-shot: 100%|██████████| 1600/1600 [25:52<00:00,  1.03it/s]


# **1-Shot**

In [None]:
one_example = create_examples(df, n_per_class=1)
one_shot_preds = []
for i in tqdm(range(len(texts)), desc="1-shot"):
    few_shot_prompt = create_prompt(system_message, one_example, user_message_template)
    few_shot_prompt.append({"role": "user", "content": user_message_template.format(text=texts[i])})
    response = groq_llm.invoke(few_shot_prompt)
    label = response.content.lower().strip()
    one_shot_preds.append(label)

df["llama_1shot"] = one_shot_preds
df.to_csv("llama3_1shot.csv", index=False)

1-shot: 100%|██████████| 1600/1600 [27:28<00:00,  1.03s/it]


# **3-Shot**

In [None]:
three_examples = create_examples(df, n_per_class=3)
three_shot_preds = []
for i in tqdm(range(len(texts)), desc="3-shot"):
    few_shot_prompt = create_prompt(system_message, three_examples, user_message_template)
    few_shot_prompt.append({"role": "user", "content": user_message_template.format(text=texts[i])})
    response = groq_llm.invoke(few_shot_prompt)
    label = response.content.lower().strip()
    three_shot_preds.append(label)

df["llama_3shot"] = three_shot_preds
df.to_csv("llama3_3shot.csv", index=False)

3-shot: 100%|██████████| 1600/1600 [29:58<00:00,  1.12s/it]
