In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from groq import Groq
from time import sleep
import re
from sklearn.metrics import mean_absolute_error
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report

In [None]:
def dprint(s, debug):
    if debug:
        print(s)

In [None]:

YOUR_GROQ_API_KEY = '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'  # Get from https://console.groq.com/keys
groq_client = Groq(api_key=YOUR_GROQ_API_KEY)

def call_groq_api(prompt, student_configs, pre_processing, post_processing, model='llama3-70b-8192', debug=False):
    prompt = pre_processing(prompt)
    
    groq_params = {
        'messages': [{'role': 'user', 'content': prompt}],
        'model': model,
        'max_tokens': student_configs.get('max_tokens', 512),
        'temperature': student_configs.get('temperature', 0.7),
        'top_p': student_configs.get('top_p', 0.7),
        'stop': student_configs.get('stop', None),
    }
    
    output = groq_client.chat.completions.create(**groq_params)
    
    dprint('*****prompt*****', debug)
    dprint(prompt, debug)
    dprint('*****result*****', debug)
    res = output.choices[0].message.content
    dprint(res, debug)
    dprint('*****output*****', debug)
    labels_only = post_processing(res)
    dprint('POST PROCESSED', debug)
    dprint(labels_only, debug)
    dprint('=========', debug)
    return labels_only

In [None]:
# model_names = [
#     'togethercomputer/llama-2-7b', #LLaMa-2-7B
#     'togethercomputer/llama-2-13b', #LLaMa-2-13B
#     'togethercomputer/llama-2-70b', #LLaMa-2-70B
#     'togethercomputer/llama-2-70b-chat', #LLaMa-2-70B-Chat
# ]

In [None]:
# Most likely current Groq model names:
model_names = [
    'llama-3.1-8b-instant',     
    "llama-3.3-70b-versatile",              
]



In [None]:
def get_train_df(topn = 10):
    train_df = pd.read_excel('train_split.xlsx')
    return train_df[:topn]


In [None]:
def get_test_set():
    test_df = pd.read_excel('test_split.xlsx')
    return test_df
    

In [None]:
def get_eval_df(topn = 5):
    eval_df = pd.read_excel('train_split.xlsx')
    return eval_df.sample(topn)
    

In [None]:
def test_range(df, prompt_configs, prompt_prefix, examples, prompt_suffix,
               pre_processing=lambda x:x, post_processing=lambda y:y,
               model='llama-3.1-8b-instant', debug=False):
    text_ids = []
    answers = []
    model_responses = []
    corrected_model_responses = []
    text_list = []
    
    for idx, row in tqdm(df.iterrows()):
        text_ids.append(idx)  # Use index as ID if no ID column
        fixed_prompt = row['text'] + "\n"
        text_list.append(row['text'])
        fixed_prompt = pre_processing(fixed_prompt)
        prompt = prompt_prefix + examples + fixed_prompt + prompt_suffix
        answer = row['label']
        answers.append(answer)
        model_response = call_groq_api(prompt, prompt_configs, pre_processing, lambda y:y, model=model, debug=debug) 
        corrected_model_response = post_processing(model_response)
        corrected_model_responses.append(corrected_model_response)
        model_responses.append(model_response)
        sleep(1)
    
    result_df = pd.DataFrame({
        'text_id': text_ids, 
        'text': text_list, 
        'model_responses': model_responses, 
        'corrected_model_responses': corrected_model_responses, 
        'true_label': answers
    })
    return result_df

## Zero Shot Prompt

In [None]:
def get_all_categories():
    train_df = pd.read_excel('train_split.xlsx')
    
    all_categories = sorted(train_df['label'].unique())
    print("Found categories:", all_categories)
    print("Total categories:", len(all_categories))
    
    return all_categories

categories = get_all_categories()

In [None]:
categories_list = get_all_categories()
categories_text = ", ".join(categories_list)

prompt_prefix_zs = f'''
Classify the following Arabic and English text into one of these categories: 
{categories_text}.

Your output should only be one of these exact category names:
{chr(10).join(categories_list)}
'''

prompt_examples_zs = "Input Text: "
prompt_suffix_zs = "Output: "


In [None]:
def your_pre_processing_zs(input_string):
    return re.sub(r"@user","", input_string).strip()

def your_post_processing_zs(output_string):
    output_clean = output_string.strip().lower()
    
    if 'business' in output_clean:
        return 'business'
    elif 'shopping' in output_clean:
        return 'shopping'
    elif 'finance' in output_clean:
        return 'finance'
    elif 'education' in output_clean:
        return 'education'
    elif 'tech' in output_clean or 'technology' in output_clean:
        return 'tech'
    elif 'sports' in output_clean:
        return 'sports'
    elif 'medical' in output_clean or 'health' in output_clean:
        return 'medical'
    else:
        return output_string.strip()  

In [None]:
prompt_config_zs = {
    'max_tokens': 3,
    'temperature': 0.4,
    'top_p': 0.7,
    'stop': []
}

model = 'llama-3.1-8b-instant'
print(model)

eval_df = get_eval_df(50)

In [None]:
results_df = test_range(eval_df, prompt_config_zs, prompt_examples_zs, prompt_prefix_zs, prompt_suffix_zs, pre_processing=your_pre_processing_zs, post_processing=your_post_processing_zs, model=model, debug=False)
print(results_df)

In [None]:
sum(results_df['corrected_model_responses']==results_df['true_label'].values)

In [None]:
f1_score(results_df['true_label'], results_df['corrected_model_responses'],average='weighted')

In [None]:
test_df = get_test_set()
results_df = test_range(test_df, prompt_config_zs, prompt_examples_zs, prompt_prefix_zs, prompt_suffix_zs, pre_processing=your_pre_processing_zs, post_processing=your_post_processing_zs, model=model, debug=False)
results_df.to_csv('zsl_test_results.csv', index=False)

In [None]:
print(classification_report(results_df['true_label'], results_df['corrected_model_responses'], digits=4))

In [None]:
joined_df = results_df.set_index("text_id").join(test_df.set_index('id'), lsuffix='_caller', rsuffix='_other')

In [None]:
cm = joined_df.loc[joined_df.codemixed==1]

In [None]:
print(classification_report(cm['true_label'], cm['corrected_model_responses'], digits=4))

In [None]:
mono = joined_df.loc[joined_df.codemixed==0]

In [None]:
print(classification_report(mono['true_label'], mono['corrected_model_responses'], digits=4))

## Few Shot Prompting (In Context Learning)
Useful to fix output format

In [None]:
train_df = get_train_df(50)

In [None]:
def create_example(row):
    line1 = "Input Text: " + row['text'] + "\n"
    label = row['label']  
    line2 = "Output: " + label + "\n"
    return line1 + line2

In [None]:
prompt_examples_icl = ""
for idx,row in train_df.iterrows():
    ex = create_example(row)
    prompt_examples_icl += ex

In [None]:
prompt_examples_icl = prompt_examples_icl + "Input Text: "

In [None]:
categories = get_all_categories()
categories_text = ", ".join(categories)

prompt_prefix_icl = f'''
Classify the following Arabic and English text into one of these categories: 
{categories_text}.

Your output should only be one of these exact category names like in the examples below.
'''

prompt_suffix_icl = "Output: "


In [None]:
# results_df = test_range(eval_df, prompt_config_zs, prompt_examples_icl, prompt_prefix_icl, prompt_suffix_icl, pre_processing=your_pre_processing_zs, post_processing=your_post_processing_zs, model=model, debug=False)
# print(results_df)

In [None]:
f1_score(results_df['true_label'], results_df['corrected_model_responses'],average='weighted')

In [None]:
test_df = get_test_set()
results_df = test_range(test_df, prompt_config_zs, prompt_examples_icl, prompt_prefix_icl, prompt_suffix_icl, pre_processing=your_pre_processing_zs, post_processing=your_post_processing_zs, model=model, debug=False)
results_df.to_csv('icl_test_results_50ex.csv', index=False)

In [None]:
print(classification_report(results_df['true_label'], results_df['corrected_model_responses'], digits=4))

In [None]:
joined_df = results_df.set_index("text_id").join(test_df.set_index('id'), lsuffix='_caller', rsuffix='_other')

In [None]:
cm = joined_df.loc[joined_df.codemixed==1]
mono = joined_df.loc[joined_df.codemixed==0]

In [None]:
print(classification_report(cm['true_label'], cm['corrected_model_responses'], digits=4))

In [None]:
mono = joined_df.loc[joined_df.codemixed==0]

In [None]:
print(classification_report(mono['true_label'], mono['corrected_model_responses'], digits=4))

## Chain of Thought Prompting

In [None]:
business_texts = [
    "تطوير business strategy فعاله يعتمد علي فهم market trends بشكل شامل ودقيق",
    "الشركات الناجحة تضع خطط استراتيجية واضحة لتحقيق اهدافها في السوق"
]
justification_business = [
    "This text is about business because it discusses company strategies, market analysis, and commercial planning",
    "This belongs to business category as it focuses on organizational management and market competition"
]

education_texts = [
    "في حال تم تطبيق effective curriculum development سيزيد ذلك من مستوي student engagement في الفصول الدراسيه",
    "استخدام virtual classrooms يعزز فرص collaboration بين الطلاب من مختلف انحاء العالم"
]
justification_education = [
    "This text is about education because it mentions curriculum development and student engagement in classrooms",
    "This belongs to education category as it discusses virtual learning and student collaboration"
]

finance_texts = [
    "اذا كنت تريد النجاح في trading عليك ان تتجنب emotional decisions وتركز علي market analysis",
    "تعتبر fintech من المجالات الرايده حيث تقدم حلولا مبتكره لتحسين banking services"
]
justification_finance = [
    "This text is about finance because it covers trading strategies and market analysis",
    "This belongs to finance category as it discusses fintech innovations and banking services"
]

health_texts = [
    "اذا كنت ترغب في تحسين mental health يجب عليك ممارسه mindfulness بانتظام وتناول طعام صحي",
    "ممارسة الرياضة اليومية تساعد في الحفاظ على صحة القلب والجسم بشكل عام"
]
justification_health = [
    "This text is about health because it discusses mental health, mindfulness, and healthy eating",
    "This belongs to health category as it covers physical exercise and heart health"
]

medical_texts = [
    "تعتبر nutrition السليمه اساسا لنجاح اي medical treatment لذا يجب التركيز علي الاطعمه الغنيه بالفيتامينات",
    "هل يمكن ان توثر pharmaceutical advancements علي نسبه الشفاء من الامراض المزمنه بشكل كبير"
]
justification_medical = [
    "This text is about medical because it discusses nutrition in medical treatment and vitamins",
    "This belongs to medical category as it covers pharmaceutical advancements and chronic disease treatment"
]

shopping_texts = [
    "اذا كنت ترغب في شراء consumer electronics فمن الافضل دايما مراجعه customer reviews قبل اتخاذ قرارك",
    "مقارنة الاسعار بين المتاجر المختلفة تساعد في اتخاذ قرار شراء أفضل"
]
justification_shopping = [
    "This text is about shopping because it mentions consumer electronics and customer reviews",
    "This belongs to shopping category as it discusses price comparison and purchase decisions"
]

social_texts = [
    "تفاعل الافراد في المجتمع يساهم في بناء علاقات قوية وتحسين جودة الحياة الاجتماعية",
    "المبادرات المجتمعية تلعب دوراً هاماً في حل المشكلات الاجتماعية وتعزيز التكافل"
]
justification_social = [
    "This text is about social because it discusses community interactions and social relationships",
    "This belongs to social category as it covers community initiatives and social problem-solving"
]

sports_texts = [
    "اللاعبون المميزون في cricket يعرفون كيف يستخدمون strategic planning للفوز بالمباريات المهمه",
    "التدريب المستمر والالتزام بالبرنامج الرياضي اساسي لتحقيق النتائج في المسابقات"
]
justification_sports = [
    "This text is about sports because it mentions cricket players and strategic planning in games",
    "This belongs to sports category as it discusses continuous training and sports competitions"
]

tech_texts = [
    "اذا استثمرت الشركات في cloud computing ستتمكن من تحسين كفاءه عملياتها وتقليل التكاليف",
    "الذكاء الاصطناعي يساهم في تطوير قطاعات متعددة من الصناعة والخدمات"
]
justification_tech = [
    "This text is about tech because it covers cloud computing and operational efficiency",
    "This belongs to tech category as it discusses artificial intelligence and industry development"
]

In [None]:
import random
random.seed(40)

def create_examples(all_texts, all_justifications, all_labels):
    examples = []

    # Combine all texts, justifications and labels
    combined_data = list(zip(all_texts, all_justifications, all_labels))
    random.shuffle(combined_data)

    for text, justification, label in combined_data:
        line1 = "Input Text: " + text + "\n"
        justification_line = "Justification: " + justification + "\n"
        line2 = "Output: " + label + "\n"
        examples.append(line1 + justification_line + line2)

    return ''.join(examples)

In [None]:
all_topic_texts = (business_texts + education_texts + finance_texts + 
                   health_texts + medical_texts + shopping_texts + 
                   social_texts + sports_texts + tech_texts)

all_topic_justifications = (justification_business + justification_education + justification_finance +
                           justification_health + justification_medical + justification_shopping +
                           justification_social + justification_sports + justification_tech)

all_topic_labels = (['business'] * len(business_texts) + 
                   ['education'] * len(education_texts) +
                   ['finance'] * len(finance_texts) +
                   ['health'] * len(health_texts) +
                   ['medical'] * len(medical_texts) +
                   ['shopping'] * len(shopping_texts) +
                   ['social'] * len(social_texts) +
                   ['sports'] * len(sports_texts) +
                   ['tech'] * len(tech_texts))

In [None]:
prompt_examples_cot = create_examples(all_topic_texts, all_topic_justifications, all_topic_labels) + "Input Text: "

In [None]:
print(prompt_examples_cot)

In [None]:
prompt_prefix_cot = f'''
Classify the following Arabic text into one of these categories: 
{", ".join(categories)}.

Analyze the text and provide a justification before giving the final category.
Your output should include a justification and then the category name like in the examples below.
'''

prompt_suffix_cot = "Justification: "

In [None]:
def your_pre_processing_cot(input_string):
    return input_string.strip()  

def your_post_processing_cot(output_string):
    categories = ['business', 'education', 'finance', 'health', 'medical', 'shopping', 'social', 'sports', 'tech']
    output_lower = output_string.lower()
    
    for category in categories:
        if category in output_lower:
            return category
    
    return output_string.split()[0] if output_string.strip() else 'unknown'

In [None]:
prompt_config_cot = {'max_tokens': 30,
                'temperature': 0.4,
                'top_k': 50,
                'top_p': 0.7,
                'repetition_penalty': 1,
                'stop': []}

model = 'llama-3.1-8b-instant'
print(model)

eval_df = get_eval_df(50)

In [None]:
results_df = test_range(eval_df, prompt_config_cot, prompt_examples_cot, prompt_prefix_cot, prompt_suffix_cot, pre_processing=your_pre_processing_cot, post_processing=your_post_processing_cot, model=model, debug=False)
print(results_df)

In [None]:
results_df.to_csv('error_analysis.csv', index=False)

In [None]:
results_df['corrected_model_responses'] = results_df['model_responses'].apply(your_post_processing_cot)

In [None]:
f1_score(results_df['true_label'], results_df['corrected_model_responses'],average='weighted')

In [None]:
print(classification_report(results_df['true_label'], results_df['corrected_model_responses']))

## Test Set Inference

In [None]:
test_df = get_test_set()

In [None]:
results_df = test_range(test_df, prompt_config_cot, prompt_examples_cot, prompt_prefix_cot, prompt_suffix_cot, pre_processing=your_pre_processing_cot, post_processing=your_post_processing_cot, model=model, debug=False)

In [None]:
results_df['corrected_model_responses'] = results_df['model_responses'].apply(your_post_processing_cot)

In [None]:
print(classification_report(results_df['true_label'], results_df['corrected_model_responses']))

In [None]:
results_df.to_csv('test_results.csv', index=False)

In [None]:
results_df = test_range(test_df, prompt_config_cot, prompt_examples_cot, prompt_prefix_cot, prompt_suffix_cot, pre_processing=your_pre_processing_cot, post_processing=your_post_processing_cot, model=model, debug=False)

In [None]:
results_df['corrected_model_responses'] = results_df['model_responses'].apply(your_post_processing_cot)

In [None]:
results_df = pd.read_csv('test_results.csv')

In [None]:
print(classification_report(results_df['true_label'], results_df['corrected_model_responses'], digits=4))

In [None]:
joined_df = results_df.set_index("text_id").join(test_df.set_index('id'), lsuffix='_caller', rsuffix='_other')

In [None]:
cm = joined_df.loc[joined_df.codemixed==1]
mono = joined_df.loc[joined_df.codemixed==0]

In [None]:
print(classification_report(mono['true_label'], mono['corrected_model_responses'], digits=4))

In [None]:
print(classification_report(cm['true_label'], cm['corrected_model_responses'], digits=4))

## OOD Set Inference

In [None]:
# There is No Data for This
def get_ood_set():
    ood_df = pd.read_excel('combined.xlsx')
    return ood_df

In [None]:
ood_df = get_ood_set()

In [None]:
model = 'llama-3.1-8b-instant'

In [None]:
results_df = test_range(ood_df, prompt_config_cot, prompt_examples_cot, prompt_prefix_cot, prompt_suffix_cot, pre_processing=your_pre_processing_cot, post_processing=your_post_processing_cot, model=model, debug=False)

In [None]:
results_df['corrected_model_responses'] = results_df['model_responses'].apply(your_post_processing_cot)

In [None]:
print(classification_report(results_df['true_label'], results_df['corrected_model_responses']))

In [None]:
results_df = test_range(ood_df, prompt_config_cot, prompt_examples_cot, prompt_prefix_cot, prompt_suffix_cot, pre_processing=your_pre_processing_cot, post_processing=your_post_processing_cot, model=model, debug=False)

In [None]:
results_df['corrected_model_responses'] = results_df['model_responses'].apply(your_post_processing_cot)

In [None]:
results_df.to_csv('ood_results.csv', index=False)

In [None]:
results_df = pd.read_csv('ood_results.csv')

In [None]:
print(classification_report(results_df['true_label'], results_df['corrected_model_responses'], digits=4))

In [None]:
joined_df = results_df.set_index('text_id').join(ood_df.set_index('id'), lsuffix='_caller', rsuffix='_other')

In [None]:
rgn = joined_df.loc[joined_df['domain']=='religion']
gen = joined_df.loc[joined_df['domain']=='gender']
ori = joined_df.loc[joined_df['domain']=='orientation']

In [None]:
print(classification_report(rgn['offense_caller'], rgn['corrected_model_responses'], digits=4))
print(classification_report(gen['offense_caller'], gen['corrected_model_responses'], digits=4))
print(classification_report(ori['offense_caller'], ori['corrected_model_responses'], digits=4))


## ZSL OOD

In [None]:
results_df = test_range(ood_df, prompt_config_zs, prompt_examples_zs, prompt_prefix_zs, prompt_suffix_zs, pre_processing=your_pre_processing_zs, post_processing=your_post_processing_zs, model=model, debug=False)


In [None]:
results_df.to_csv('zsl_ood_results.csv', index=False)

In [None]:
joined_df = results_df.set_index('text_id').join(ood_df.set_index('id'), lsuffix='_caller', rsuffix='_other')

In [None]:
rgn = joined_df.loc[joined_df['domain']=='religion']
gen = joined_df.loc[joined_df['domain']=='gender']
ori = joined_df.loc[joined_df['domain']=='orientation']

In [None]:
print(classification_report(rgn['offense_caller'], rgn['corrected_model_responses'], digits=4))
print(classification_report(gen['offense_caller'], gen['corrected_model_responses'], digits=4))
print(classification_report(ori['offense_caller'], ori['corrected_model_responses'], digits=4))
print(classification_report(joined_df['offense_caller'], joined_df['corrected_model_responses'], digits=4))

## ICL OOD

In [None]:
results_df = test_range(ood_df, prompt_config_zs, prompt_examples_icl, prompt_prefix_icl, prompt_suffix_icl, pre_processing=your_pre_processing_zs, post_processing=your_post_processing_zs, model=model, debug=False)
results_df.to_csv('icl_ood_results_50ex.csv', index=False)

In [None]:
joined_df = results_df.set_index('text_id').join(ood_df.set_index('id'), lsuffix='_caller', rsuffix='_other')

In [None]:
rgn = joined_df.loc[joined_df['domain']=='religion']
gen = joined_df.loc[joined_df['domain']=='gender']
ori = joined_df.loc[joined_df['domain']=='orientation']

In [None]:
print(classification_report(rgn['offense_caller'], rgn['corrected_model_responses'], digits=4))
print(classification_report(gen['offense_caller'], gen['corrected_model_responses'], digits=4))
print(classification_report(ori['offense_caller'], ori['corrected_model_responses'], digits=4))
print(classification_report(joined_df['offense_caller'], joined_df['corrected_model_responses'], digits=4))