In [1]:
import pandas as pd
import openai
import json
from pydantic import BaseModel, Field

## Reading dataset

In [2]:
file_path= 'dataset_merged_ranlp.csv'
df= pd.read_csv(file_path)
# Filter by train dataset
train_df = df[df['source'] == 'train']

## Binary Classification

## Extracting random samples

In [3]:
# Number of examples per class
num_samples = 3 

# Get random examples from train for each combination of binary 
few_shot_examples = train_df.groupby(['binary']).apply(
    lambda x: x.sample(n=min(num_samples, len(x)), random_state=42)
)

# Reset index to avoid multi-index issues
few_shot_examples = few_shot_examples.reset_index(drop=True)

  few_shot_examples = train_df.groupby(['binary']).apply(


In [4]:
few_shot_examples.to_csv('binary_few_shot_examples.csv')

## Binary Samples for english

In [5]:
# Filter for english
filtered_examples = few_shot_examples[
    (few_shot_examples['language'] == 'english')
]

# Convert filtered examples to the desired format
few_shot_text = " ".join([
    f"Text {i}: {row['clean_text']} Label {i}: {row['binary']}"
    for i, (_, row) in enumerate(filtered_examples.iterrows(), 1)
])

few_shot_text

'Text 1:  perhaps i felt that way after checking my #btc wallet but was encouraged and hopeful with puss do yourself favor and good by buying some puss a community driven gemstonewhere every member is a priority #pussdao #puss #bnb web  Label 1: Hope Text 2: according to dhruv rathee  arrests should depend on the follower count of the person filing the fir  to think there are enough idiots out there who listen to his dumb youtube videos as the gospel truth what a joke this is  Label 2: Not Hope'

## Binary Samples for spanish

In [6]:
# Filter for spanish
filtered_examples = few_shot_examples[
    (few_shot_examples['language'] == 'spanish')
]

# Convert filtered examples to the desired format
few_shot_text = " ".join([
    f"Text {i}: {row['clean_text']} Label {i}: {row['binary']}"
    for i, (_, row) in enumerate(filtered_examples.iterrows(), 1)
])

few_shot_text

'Text 1: hoy desperte y ya no te extraño ahora solo siento un sentimiento de emocion y esperanza para ser la mejor version de mi que pueda existir hearthandslightskintone Label 1: Hope'

## Binary Samples for german

In [7]:
# Filter for german
filtered_examples = few_shot_examples[
    (few_shot_examples['language'] == 'german')
]

# Convert filtered examples to the desired format
few_shot_text = " ".join([
    f"Text {i}: {row['clean_text']} Label {i}: {row['binary']}"
    for i, (_, row) in enumerate(filtered_examples.iterrows(), 1)
])

few_shot_text

'Text 1:  positiv sicher aber sind älter und niemals modernisieworden müssten erst an die anderen zugesagten a4 angepasst werden Label 1: Not Hope Text 2:  jaja man erinnesich wie deutschland in den 1940ern frieden nach ganz europa gebracht hat mit immer mehr und schwereren waffen glauben diese trottel ihren schwachsinn eigentlich selbst? Label 2: Not Hope'

## Binary Samples for urdu

In [8]:
# Filter for urdu
filtered_examples = few_shot_examples[
    (few_shot_examples['language'] == 'urdu')
]

# Convert filtered examples to the desired format
few_shot_text = " ".join([
    f"Text {i}: {row['clean_text']} Label {i}: {row['binary']}"
    for i, (_, row) in enumerate(filtered_examples.iterrows(), 1)
])

few_shot_text

'Text 1:  انشا اللہبہت جلد ان شہدا کا بدلہ لیں گے Label 1: Hope'

## Multiclass Classification

## Extracting random samples

In [9]:
 # Number of examples per class
num_samples = 3 

# Get random examples from train for each combination of binary and language
few_shot_examples = train_df.groupby(['multiclass','language']).apply(
    lambda x: x.sample(n=min(num_samples, len(x)), random_state=42)
)

# Reset index to avoid multi-index issues
few_shot_examples = few_shot_examples.reset_index(drop=True)

few_shot_examples

  few_shot_examples = train_df.groupby(['multiclass','language']).apply(


Unnamed: 0,text,binary,multiclass,source,clean_text,language
0,#ForbiddenDoor thoughts so far today:\n\nThe p...,Hope,Generalized Hope,train,#forbiddendoor thoughts so far today the pop f...,english
1,#USER# I’m currently matching/exceeding expect...,Hope,Generalized Hope,train,im currently matchingexceeding expectations a...,english
2,#USER# Spokane is losing a good one! Appreciat...,Hope,Generalized Hope,train,spokane is losing a good one! appreciate all ...,english
3,"#USER# Nun ja, die Hoffnung besteht. Bei den m...",Hope,Generalized Hope,train,nun ja die hoffnung besteht bei den meisten,german
4,"#USER# Gute Nacht, J. und schöne Träume! ✨💚✨",Hope,Generalized Hope,train,gute nacht j und schöne träume! sparklesgreen...,german
5,"Wow, Futures tatsächlich ins Close positiv.\nM...",Hope,Generalized Hope,train,wow futures tatsächlich ins close positiv mal ...,german
6,me ven normal pero todos los días pienso en lo...,Hope,Generalized Hope,train,me ven normal pero todos los dias pienso en lo...,spanish
7,#USER# Viendo a varios de los que tengo alrede...,Hope,Generalized Hope,train,viendo a varios de los que tengo alrededor cr...,spanish
8,Disfruten mucho el tiempo de calidad que les b...,Hope,Generalized Hope,train,disfruten mucho el tiempo de calidad que les b...,spanish
9,جیسے علماء سودی نظام کیخلاف اکھٹے ہوئے ہیں کاش...,Hope,Generalized Hope,train,جیسے علماء سودی نظام کیخلاف اکھٹے ہوے ہیں کاش ...,urdu


In [10]:
few_shot_examples.to_csv('multiclass_few_shot_examples.csv')

## Multiclass Samples for english

In [11]:
# Filter for english
filtered_examples = few_shot_examples[
    (few_shot_examples['language'] == 'english')
]

# Convert filtered examples to the desired format
few_shot_text = " ".join([
    f"Text {i}: {row['clean_text']} Label {i}: {row['multiclass']}"
    for i, (_, row) in enumerate(filtered_examples.iterrows(), 1)
])

few_shot_text

'Text 1: #forbiddendoor thoughts so far today the pop for suzukis kazi ni nare entrancehim and eddie squaring offamp just the chaos of that match in general was sick and then the triple threat tag was amazing as well ftr finally being so over is beautiful and i really hope dax is ok Label 1: Generalized Hope Text 2:  im currently matchingexceeding expectations and in line with the club vision so im hoping ill be okay always a risk though Label 2: Generalized Hope Text 3:  spokane is losing a good one! appreciate all of your hard work youve earned this and i hope you enjoy the new position! Label 3: Generalized Hope Text 4: lmaoooo i pray no one has to go through thisfacewithtearsofjoy fck them other kids tho  Label 4: Not Hope Text 5: could he be the warg he was and maintain his place within the demon lords army? or was a chance meeting with a strange woman who desired to become a caladrius for a druid king meant to ruin him? megaphone  Label 5: Not Hope Text 6: chongyun you have to be

## Multiclass Samples for spanish

In [12]:
# Filter for spanish
filtered_examples = few_shot_examples[
    (few_shot_examples['language'] == 'spanish')
]

# Convert filtered examples to the desired format
few_shot_text = " ".join([
    f"Text {i}: {row['clean_text']} Label {i}: {row['multiclass']}"
    for i, (_, row) in enumerate(filtered_examples.iterrows(), 1)
])

few_shot_text

'Text 1: me ven normal pero todos los dias pienso en los futuros turnos de bomberos que voy a hacer y rezo para no cagarme toda Label 1: Generalized Hope Text 2:  viendo a varios de los que tengo alrededor creo que ese episodio dio demasiadas ideas pero son felices Label 2: Generalized Hope Text 3: disfruten mucho el tiempo de calidad que les brindan sus novios es lo que mas anhelo con el mio se que todo esfuerzo que hacemos tendra su recompensa crossedfingerslightskintonemendingheart Label 3: Generalized Hope Text 4:  sabes que eso fue en 2012 no? ese año el balon de oro se lo regalan a cristiano Label 4: Not Hope Text 5: hoy es de esos dias en los que anhelo y necesito comer fruta Label 5: Not Hope Text 6:  la de la clase de falo? jajaja y si creo que no soporto nailpolishlightskintone Label 6: Not Hope Text 7:  este video me da esperanza al menos ya tengo la cara de artesania oaxaqueña smilingfacewithtear  Label 7: Realistic Hope Text 8: angelus  a las 1200h te invitamos a rezar el 

## Multiclass Samples for german

In [13]:
# Filter for german
filtered_examples = few_shot_examples[
    (few_shot_examples['language'] == 'german')
]

# Convert filtered examples to the desired format
few_shot_text = " ".join([
    f"Text {i}: {row['clean_text']} Label {i}: {row['multiclass']}"
    for i, (_, row) in enumerate(filtered_examples.iterrows(), 1)
])

few_shot_text

'Text 1:  nun ja die hoffnung besteht bei den meisten Label 1: Generalized Hope Text 2:  gute nacht j und schöne träume! sparklesgreenheartsparkles Label 2: Generalized Hope Text 3: wow futures tatsächlich ins close positiv mal sehen was gleich tsla bringt  Label 3: Generalized Hope Text 4:  viele der aufklärer und ihrer nachfahren im 19 jahrhundewaren tatsächlich kinder ihrer zeit das heißt nicht dass sie in ihrem wunsch verblendet waren ob die zeit nach 1871 bis heute so eine größere erfolgsgeschichte war als die periode des hrr darf angezweifelt werden Label 4: Not Hope Text 5:  wunsch und wirklichkeit bleiben in meinem leben auch eher zwei dinge die kaum vereinbar sind Label 5: Not Hope Text 6:  hat da einer noch hoffnung auf vom tellerwäscher zum milliardär ? oder bist du es elon? facewithtearsofjoy Label 6: Not Hope Text 7: mmmh fluglärm  welch lieblicher klang ist mir 2 orte weiter nie wirklich so stark aufgefallen disguisedface aber vielleicht träume ich dann von urlaub?  Label

## Multiclass Samples for urdu

In [14]:
# Filter for urdu
filtered_examples = few_shot_examples[
    (few_shot_examples['language'] == 'urdu')
]

# Convert filtered examples to the desired format
few_shot_text = " ".join([
    f"Text {i}: {row['clean_text']} Label {i}: {row['multiclass']}"
    for i, (_, row) in enumerate(filtered_examples.iterrows(), 1)
])

few_shot_text

'Text 1: جیسے علماء سودی نظام کیخلاف اکھٹے ہوے ہیں کاش کہ سود کی حرمت کے بارے مطلع کرنے والے صحابہ کرام کے دفاع کیلے بھی اکھٹے ہوجاے brokenheart Label 1: Generalized Hope Text 2:  الفاظ نہیں ہیں اس درد کو لکھنے کے لیے loudlycryingfacepensiveface پ تو ماں باپ ہیں ہم ان معصوم بچوں کا دکھ ج تک نہیں بھول سکے اللہ پاک پ کو صبر عطا کرے اور سب بچوں کو جنت میں اعلی مقام عطا فرماے مین palmsuptogether Label 2: Generalized Hope Text 3:  مجھے بیٹھنے کی جگہ ملے میری رزو کا بھرم رہےتیری انجمن میں اگر نہیں تیری انجمن سے قریں سہیپیر سید نصیرالدین نصیر رح Label 3: Generalized Hope Text 4: حکم کس کا بھی تھا ساتھ کون کھڑا رہا اس وقت یہ بات زیادہ معنی رکھتیاسد عمر نے وہی کیا جو پی ٹی ا کا منشور تھا روز اول سے بعد میں بھی وہی کیا جب دوبارہ منسٹری ملی نہ وہ کسی کے گے جھکے نہ رکےاللہ جس سے جو کام لینا چاہےasadumar stay strong u will always be my favorite smilingfacewithsmilingeyes Label 4: Not Hope Text 5: ایک قصبے میں رہنے والے عجیب لوگ ان کی زندگی کے بارے میں جانا licker کی ہے سب کچھ licks ہے جو ایک دمی ای