In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from groq import Groq
from time import sleep
import re
from sklearn.metrics import mean_absolute_error
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report

In [None]:
def dprint(s, debug):
    if debug:
        print(s)

In [None]:

YOUR_GROQ_API_KEY = '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'  # Get from https://console.groq.com/keys
groq_client = Groq(api_key=YOUR_GROQ_API_KEY)

def call_groq_api(prompt, student_configs, pre_processing, post_processing, model='llama3-70b-8192', debug=False):
    prompt = pre_processing(prompt)
    
    groq_params = {
        'messages': [{'role': 'user', 'content': prompt}],
        'model': model,
        'max_tokens': student_configs.get('max_tokens', 512),
        'temperature': student_configs.get('temperature', 0.7),
        'top_p': student_configs.get('top_p', 0.7),
        'stop': student_configs.get('stop', None),
    }
    
    output = groq_client.chat.completions.create(**groq_params)
    
    dprint('*****prompt*****', debug)
    dprint(prompt, debug)
    dprint('*****result*****', debug)
    res = output.choices[0].message.content
    dprint(res, debug)
    dprint('*****output*****', debug)
    labels_only = post_processing(res)
    dprint('POST PROCESSED', debug)
    dprint(labels_only, debug)
    dprint('=========', debug)
    return labels_only

In [None]:
# model_names = [
#     'togethercomputer/llama-2-7b', #LLaMa-2-7B
#     'togethercomputer/llama-2-13b', #LLaMa-2-13B
#     'togethercomputer/llama-2-70b', #LLaMa-2-70B
#     'togethercomputer/llama-2-70b-chat', #LLaMa-2-70B-Chat
# ]

In [None]:
# Most likely current Groq model names:
model_names = [
    'llama-3.1-8b-instant',     
    "llama-3.3-70b-versatile",              
]



In [None]:
def get_train_df(topn = 10):
    train_df = pd.read_csv('train.csv')
    return train_df[:topn]


In [None]:
def get_test_set():
    test_df = pd.read_csv('test.csv')
    return test_df
    

In [None]:
def get_eval_df(topn = 5):
    eval_df = pd.read_csv('val.csv')
    return eval_df.sample(topn)
    

In [None]:
def test_range(df, prompt_configs, prompt_prefix, examples, prompt_suffix,
               pre_processing=lambda x:x, post_processing=lambda y:y,
               model='llama-3.1-8b-instant', debug=False):
    tweet_idx = []
    answers = []
    model_responses = []
    corrected_model_responses = []
    tweet_txt_list = []
    ap_text = ""
    for idx, row in tqdm(df.iterrows()):
        tweet_idx.append(row['id'])
        fixed_prompt = row['tweet_text'] + "\n"
        tweet_txt_list.append(row['tweet_text'])
        fixed_prompt = pre_processing(fixed_prompt)
        prompt = prompt_prefix + examples + fixed_prompt + prompt_suffix
        answer = row['offense']
        answers.append(answer)
        model_response = call_groq_api(prompt, prompt_configs, pre_processing, lambda y:y, model=model, debug=debug) 
        corrected_model_response = post_processing(model_response)
        corrected_model_responses.append(corrected_model_response)
        model_responses.append(model_response)
        sleep(1)
    df = pd.DataFrame({'tweet_idx': tweet_idx, 'tweet_text': tweet_txt_list, 'model_responses': model_responses, 'corrected_model_responses':corrected_model_responses, 'offense': answers})
    return df

## Zero Shot Prompt

In [None]:
prompt_prefix_zs = \
'''Offensive speech focuses on the potentially hurtful effect of the tweet content on a given target. This category of text often contains offensive words such as sarcastic remarks, insults, slanders, and slurs.
Based on above definition, classify the following Hinglish tweet into offensive speech or not. 
Your output should only be either "OFF" for offensive, or "NOT" for not offensive.
'''
prompt_examples_zs = "Input Tweet: "
prompt_suffix_zs = "Output: "


In [None]:
# input_string: '@user tweet'
def your_pre_processing_zs(input_string):
    return re.sub(r"@user","", input_string).strip()

def your_post_processing_zs(output_string):
    # using regular expression to find the first consecutive digits in the returned string
    if output_string.strip().lower()[:3]=='off':
        return 1
    elif output_string.strip().lower()[:3]=='not':
        return 0
    else:
        return 0

In [None]:
prompt_config_zs = {
    'max_tokens': 3,
    'temperature': 0.4,
    'top_p': 0.7,
    'stop': []
}

model = 'llama-3.1-8b-instant'
print(model)

eval_df = get_eval_df(50)

In [None]:
results_df = test_range(eval_df, prompt_config_zs, prompt_examples_zs, prompt_prefix_zs, prompt_suffix_zs, pre_processing=your_pre_processing_zs, post_processing=your_post_processing_zs, model=model, debug=False)
print(results_df)

In [None]:
sum(results_df['corrected_model_responses']==results_df['offense'].values)

In [None]:
f1_score(results_df['offense'], results_df['corrected_model_responses'])

In [None]:
test_df = get_test_set()
results_df = test_range(test_df, prompt_config_zs, prompt_examples_zs, prompt_prefix_zs, prompt_suffix_zs, pre_processing=your_pre_processing_zs, post_processing=your_post_processing_zs, model=model, debug=False)
results_df.to_csv('zsl_test_results.csv', index=False)

In [None]:
print(classification_report(results_df['offense'], results_df['corrected_model_responses'], digits=4))

In [None]:
joined_df = results_df.set_index("tweet_idx").join(test_df.set_index('id'), lsuffix='_caller', rsuffix='_other')

In [None]:
cm = joined_df.loc[joined_df.codemixed==1]

In [None]:
print(classification_report(cm['offense_caller'], cm['corrected_model_responses'], digits=4))

In [None]:
mono = joined_df.loc[joined_df.codemixed==0]

In [None]:
print(classification_report(mono['offense_caller'], mono['corrected_model_responses'], digits=4))

## Few Shot Prompting (In Context Learning)
Useful to fix output format

In [None]:
train_df = get_train_df(50)

In [None]:
def create_example(row):
    line1 = "Input Tweet: " + row['tweet_text'] + "\n"
    label = 'OFF' if row['offense']==1 else 'NOT'
    line2 = "Output: " + label + "\n"
    return line1+line2


In [None]:
prompt_examples_icl = ""
for idx,row in train_df.iterrows():
    ex = create_example(row)
    prompt_examples_icl += ex

In [None]:
prompt_examples_icl = prompt_examples_icl + "Input Tweet: "

In [None]:
prompt_prefix_icl = \
'''Offensive speech focuses on the potentially hurtful effect of the tweet content on a given target. This category of text often contains offensive words such as sarcastic remarks, insults, slanders, and slurs.
Based on above definition, classify the following Hinglish tweet into offensive speech or not. Your output should only be either "OFF" for offensive, or "NOT" for not offensive like in the examples below.
'''



prompt_suffix_icl = "Output: "


In [None]:
# results_df = test_range(eval_df, prompt_config_zs, prompt_examples_icl, prompt_prefix_icl, prompt_suffix_icl, pre_processing=your_pre_processing_zs, post_processing=your_post_processing_zs, model=model, debug=False)
# print(results_df)

In [None]:
f1_score(results_df['offense'], results_df['corrected_model_responses'])

In [None]:
test_df = get_test_set()
results_df = test_range(test_df, prompt_config_zs, prompt_examples_icl, prompt_prefix_icl, prompt_suffix_icl, pre_processing=your_pre_processing_zs, post_processing=your_post_processing_zs, model=model, debug=False)
results_df.to_csv('icl_test_results_50ex.csv', index=False)

In [None]:
print(classification_report(results_df['offense'], results_df['corrected_model_responses'], digits=4))

In [None]:
joined_df = results_df.set_index("tweet_idx").join(test_df.set_index('id'), lsuffix='_caller', rsuffix='_other')

In [None]:
cm = joined_df.loc[joined_df.codemixed==1]
mono = joined_df.loc[joined_df.codemixed==0]

In [None]:
print(classification_report(cm['offense_caller'], cm['corrected_model_responses'], digits=4))

In [None]:
mono = joined_df.loc[joined_df.codemixed==0]

In [None]:
print(classification_report(mono['offense_caller'], mono['corrected_model_responses'], digits=4))

## Chain of Thought Prompting

In [None]:
off_tweets = [
"Ho skta h kuch baudh to teri tarah dogle bhi nhi hote..balki bahut hmare sc ya dalit bhai hmare sath mandiro me jate h khub dharm ko mante hai snatan me viswas krte h..or kuch gaddar meem walo ka gana gate hai..wo acha h lekin jo tum kar rhe ho wo ghtiya h..jai meem bol ab",
"Kab aana hai ? # Gharwapasi",
"Haa...Ab Umpire Bhi IPL khelega Chutiya Sala Fixer."]

In [None]:
justification_off_tweets = ["This tweet is offensive because it is insulting in nature", "This tweet is offensive due to the the context of Ghar Wapasi which involves religious conversions", "This tweet is offensive as it insults the umpire and uses cuss words."]

In [None]:
non_off_tweets = ["I don't understand why ppl take law in hands...agar chor tha toh police complaint hone thee...bekar me moka diya jata h ke dalit exploitation ho raha h...", "I have of hindus crimes data agints dalit u hater.\
Dalit hater community.\
Do u have data to prove me otherwise and provide data for muslim crime against hindu", "#demonetisation  .. mention bigger scam than this 😂😂",
"Can he himself come to court room in an himachali outfit or in an pure old adivasi dress(no dress)...",
"Just like modi said corruption will end with demonetisation\
15 L will be in everyone's account \
2 million jobs every year \
USA will stand in line for visa etc \
Aap &amp; bjpeee are the two sides of lies &amp; deceit", "Romanticizing open defecation under heavy rain to enjoy the melancholy"]

In [None]:
justification_non_off_tweets = ["This tweet is not offensive as it is against lynching of individuals", "This tweet is not offensive as it just asks for more data", "This tweet is not offensive as it criticizes a government policy which is allowed in a healthy democracy",
                                "This tweet is not offensive as it is in fact showcasing cultural diversity and not containing offensive language",
                                "This tweet is not offensive as even though it is critical of political promises, it simply expresses a perspective on political issues.", "This tweet is not offensive despite the sarcasm, because it is satirical in nature without offending any group in particular"]

In [None]:
import random
random.seed(40)
def create_examples(off_tweets, justification_off_tweets, non_off_tweets, justification_non_off_tweets):
    examples = []

    combined_tweets = list(zip(off_tweets, justification_off_tweets, ['OFF'] * len(off_tweets))) + \
                      list(zip(non_off_tweets, justification_non_off_tweets, ['NOT'] * len(non_off_tweets)))

    random.shuffle(combined_tweets)

    for tweet, justification, label in combined_tweets:
        line1 = "Input Tweet: " + tweet + "\n"
        justification_line = "Justification: " + justification + "\n"
        line2 = "Output: " + label + "\n"
        examples.append(line1 + justification_line + line2)

    return ''.join(examples)

In [None]:
# def create_examples(off_tweets, justification_off_tweets, non_off_tweets, justification_non_off_tweets):
#     examples = ""
#     for i in range(len(off_tweets)):
#         line1 = "Input Tweet: " + off_tweets[i] + "\n"
#         justification_off = "Justification: " + justification_off_tweets[i] + "\n"
#         line2 = "Output: " + 'OFF' + "\n"
#         line3 = "Input Tweet: " + non_off_tweets[i] + "\n"
#         justification_not = "Justification: " + justification_non_off_tweets[i] + "\n"
#         line4 = "Output: " + 'NOT' + "\n"
#         set = line1 + justification_off + line2 + line3 + justification_not + line4
#         examples+=set
    
#     return examples


In [None]:
prompt_examples_cot = create_examples(off_tweets, justification_off_tweets, non_off_tweets, justification_non_off_tweets) + "Input Tweet: "

In [None]:
print(prompt_examples_cot)

In [None]:
prompt_prefix_cot = \
'''Offensive speech focuses on the potentially hurtful effect of the tweet content on a given target. This category of text often contains offensive words such as sarcastic remarks, insults, slanders, and slurs.
Based on above definition, classify the following Hinglish tweet into offensive speech or not. Your output should be a only be either "OFF" for offensive, or "NOT" for not offensive along with a justification for your output like in the examples below.
'''



prompt_suffix_cot = "Justification: "

In [None]:
# input_string: '@user tweet'
def your_pre_processing_cot(input_string):
    return re.sub(r"@user","", input_string).strip()

def your_post_processing_cot(output_string):
    # using regular expression to find the first consecutive digits in the returned string
    if output_string.find("not offensive")!=-1:
        return 0
    else:
        return 1

In [None]:
prompt_config_cot = {'max_tokens': 30,
                'temperature': 0.4,
                'top_k': 50,
                'top_p': 0.7,
                'repetition_penalty': 1,
                'stop': []}

model = 'llama-3.1-8b-instant'
print(model)

eval_df = get_eval_df(50)

In [None]:
results_df = test_range(eval_df, prompt_config_cot, prompt_examples_cot, prompt_prefix_cot, prompt_suffix_cot, pre_processing=your_pre_processing_cot, post_processing=your_post_processing_cot, model=model, debug=False)
print(results_df)

In [None]:
results_df.to_csv('error_analysis.csv', index=False)

In [None]:
results_df['corrected_model_responses'] = results_df['model_responses'].apply(your_post_processing_cot)

In [None]:
f1_score(results_df['offense'], results_df['corrected_model_responses'])

In [None]:
print(classification_report(results_df['offense'], results_df['corrected_model_responses']))

## Test Set Inference

In [None]:
test_df = get_test_set()

In [None]:
results_df = test_range(test_df, prompt_config_cot, prompt_examples_cot, prompt_prefix_cot, prompt_suffix_cot, pre_processing=your_pre_processing_cot, post_processing=your_post_processing_cot, model=model, debug=False)

In [None]:
results_df['corrected_model_responses'] = results_df['model_responses'].apply(your_post_processing_cot)

In [None]:
print(classification_report(results_df['offense'], results_df['corrected_model_responses']))

In [None]:
results_df.to_csv('test_results.csv', index=False)

In [None]:
results_df = test_range(test_df, prompt_config_cot, prompt_examples_cot, prompt_prefix_cot, prompt_suffix_cot, pre_processing=your_pre_processing_cot, post_processing=your_post_processing_cot, model=model, debug=False)

In [None]:
results_df['corrected_model_responses'] = results_df['model_responses'].apply(your_post_processing_cot)

In [None]:
results_df = pd.read_csv('test_results.csv')

In [None]:
print(classification_report(results_df['offense'], results_df['corrected_model_responses'], digits=4))

In [None]:
joined_df = results_df.set_index("tweet_idx").join(test_df.set_index('id'), lsuffix='_caller', rsuffix='_other')

In [None]:
cm = joined_df.loc[joined_df.codemixed==1]
mono = joined_df.loc[joined_df.codemixed==0]

In [None]:
print(classification_report(mono['offense_caller'], mono['corrected_model_responses'], digits=4))

In [None]:
print(classification_report(cm['offense_caller'], cm['corrected_model_responses'], digits=4))

## OOD Set Inference

In [None]:
# There is No Data for This
def get_ood_set():
    ood_df = pd.read_csv('cm_hate_combined.csv')
    return ood_df

In [None]:
ood_df = get_ood_set()

In [None]:
ood_df['tweet_text'] = ood_df['tweet']

In [None]:
model = 'llama-3.1-8b-instant'

In [None]:
results_df = test_range(ood_df, prompt_config_cot, prompt_examples_cot, prompt_prefix_cot, prompt_suffix_cot, pre_processing=your_pre_processing_cot, post_processing=your_post_processing_cot, model=model, debug=False)

In [None]:
results_df['corrected_model_responses'] = results_df['model_responses'].apply(your_post_processing_cot)

In [None]:
print(classification_report(results_df['offense'], results_df['corrected_model_responses']))

In [None]:
results_df = test_range(ood_df, prompt_config_cot, prompt_examples_cot, prompt_prefix_cot, prompt_suffix_cot, pre_processing=your_pre_processing_cot, post_processing=your_post_processing_cot, model=model, debug=False)

In [None]:
results_df['corrected_model_responses'] = results_df['model_responses'].apply(your_post_processing_cot)

In [None]:
results_df.to_csv('ood_results.csv', index=False)

In [None]:
results_df = pd.read_csv('ood_results.csv')

In [None]:
print(classification_report(results_df['offense'], results_df['corrected_model_responses'], digits=4))

In [None]:
joined_df = results_df.set_index('tweet_idx').join(ood_df.set_index('id'), lsuffix='_caller', rsuffix='_other')

In [None]:
rgn = joined_df.loc[joined_df['domain']=='religion']
gen = joined_df.loc[joined_df['domain']=='gender']
ori = joined_df.loc[joined_df['domain']=='orientation']

In [None]:
print(classification_report(rgn['offense_caller'], rgn['corrected_model_responses'], digits=4))
print(classification_report(gen['offense_caller'], gen['corrected_model_responses'], digits=4))
print(classification_report(ori['offense_caller'], ori['corrected_model_responses'], digits=4))


## ZSL OOD

In [None]:
results_df = test_range(ood_df, prompt_config_zs, prompt_examples_zs, prompt_prefix_zs, prompt_suffix_zs, pre_processing=your_pre_processing_zs, post_processing=your_post_processing_zs, model=model, debug=False)


In [None]:
results_df.to_csv('zsl_ood_results.csv', index=False)

In [None]:
joined_df = results_df.set_index('tweet_idx').join(ood_df.set_index('id'), lsuffix='_caller', rsuffix='_other')

In [None]:
rgn = joined_df.loc[joined_df['domain']=='religion']
gen = joined_df.loc[joined_df['domain']=='gender']
ori = joined_df.loc[joined_df['domain']=='orientation']

In [None]:
print(classification_report(rgn['offense_caller'], rgn['corrected_model_responses'], digits=4))
print(classification_report(gen['offense_caller'], gen['corrected_model_responses'], digits=4))
print(classification_report(ori['offense_caller'], ori['corrected_model_responses'], digits=4))
print(classification_report(joined_df['offense_caller'], joined_df['corrected_model_responses'], digits=4))

## ICL OOD

In [None]:
results_df = test_range(ood_df, prompt_config_zs, prompt_examples_icl, prompt_prefix_icl, prompt_suffix_icl, pre_processing=your_pre_processing_zs, post_processing=your_post_processing_zs, model=model, debug=False)
results_df.to_csv('icl_ood_results_50ex.csv', index=False)

In [None]:
joined_df = results_df.set_index('tweet_idx').join(ood_df.set_index('id'), lsuffix='_caller', rsuffix='_other')

In [None]:
rgn = joined_df.loc[joined_df['domain']=='religion']
gen = joined_df.loc[joined_df['domain']=='gender']
ori = joined_df.loc[joined_df['domain']=='orientation']

In [None]:
print(classification_report(rgn['offense_caller'], rgn['corrected_model_responses'], digits=4))
print(classification_report(gen['offense_caller'], gen['corrected_model_responses'], digits=4))
print(classification_report(ori['offense_caller'], ori['corrected_model_responses'], digits=4))
print(classification_report(joined_df['offense_caller'], joined_df['corrected_model_responses'], digits=4))