# Test Experiments
This notebook is designed for conducting some tests on Exesting LLMs, datasets, code, on our UC emotion detection.

In [1]:
import numpy as np
import pandas as pd
import transformers
import nltk
#import torch
import tensorflow as tf

  from .autonotebook import tqdm as notebook_tqdm


### Test 1

Falcon model

Model informations

|Falcon Version		|    Supported langauges |
| ------------------|------------------------|
|Falcon 1B		    |    English            |
|Falcon 7B-instruct	|   English-French          |
|Falcon 7B		    |    English-French-Spanish-German|
|Falcon 40B		    |    English-French-Spanish-German|
|Falcon 180B		|        English, German, Spanish, French (and limited capabilities in Italian, Portuguese, Polish, Dutch,Romanian, Czech, Swedish)|

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
#Extracting model and tokenizer
MODEL_NAME = "tiiuae/falcon-rw-1b"
tokenizer = AutoTokenizer.from_pretrained ( MODEL_NAME )
model_pipeline = transformers.pipeline ( "text-generation", 
                                        model= MODEL_NAME, 
                                        tokenizer = tokenizer,
                                        torch_dtype=torch.bfloat16,
                                        device_map="auto")

In [None]:
sequences = pipeline(
   "Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:",
    max_length=200,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

### Test 2
JAIS model

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
#Extracting tokenizer and model
model_name = "inception-mbzuai/jais-13b"
tokenizer = AutoTokenizer.from_pretrained ( model_name )
model = AutoModelForCausalLM.from_pretrained ( model_name, device_map = "auto", trust_remote_code = True )

In [None]:
#Defining function to generate response from text
def get_response(text,tokenizer=tokenizer,model=model):
    input_ids = tokenizer(text, return_tensors="pt").input_ids
    inputs = input_ids.to(device)
    input_len = inputs.shape[-1]
    generate_ids = model.generate(
        inputs,
        top_p=0.9,
        temperature=0.3,
        max_length=200-input_len,
        min_length=input_len + 4,
        repetition_penalty=1.2,
        do_sample=True,
    )
    response = tokenizer.batch_decode(
        generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
    )[0]
    return response

In [None]:
#Tests
text= "عاصمة دولة الإمارات العربية المتحدة ه"
print(get_response(text))

### Test 3

In [2]:
from datasets import load_dataset

dataset = load_dataset("asas-ai/Arabic_Sentiment_Twitter_Corpus")
dataset

Downloading readme: 100%|██████████| 727/727 [00:00<?, ?B/s] 
Downloading data: 100%|██████████| 3.15M/3.15M [00:01<00:00, 2.67MB/s]
Downloading data: 100%|██████████| 789k/789k [00:00<00:00, 1.67MB/s]
Generating train split: 100%|██████████| 45275/45275 [00:00<00:00, 671375.13 examples/s]
Generating test split: 100%|██████████| 11520/11520 [00:00<00:00, 1438903.58 examples/s]


DatasetDict({
    train: Dataset({
        features: ['label', 'tweet'],
        num_rows: 45275
    })
    test: Dataset({
        features: ['label', 'tweet'],
        num_rows: 11520
    })
})

In [7]:
train = pd.DataFrame (dataset ['train'])
test = pd.DataFrame (dataset ['test'])
print (train.shape)
print (test.shape)

(45275, 2)
(11520, 2)


In [11]:
data = pd.concat ([train, test])

In [13]:
data.to_excel ("D:/Users/chatbot_emotions_detection/Datasets/Rows dataset (Arabic - Darija)/arabic_sentiment_twitter_compus.xlsx", index = False)

### Test 4

In [17]:
darija_1 = pd.read_excel ( "D:/Users/chatbot_emotions_detection/Datasets/Rows dataset (Arabic - Darija)/darija_algerian_1.xlsx" )
darija_2 = pd.read_excel ( "D:/Users/chatbot_emotions_detection/Datasets/Rows dataset (Arabic - Darija)/darija_algerian_2.xlsx" )
darija_3 = pd.read_excel ( "D:/Users/chatbot_emotions_detection/Datasets/Rows dataset (Arabic - Darija)/darija_algerian_3.xlsx" )
darija_4 = pd.read_excel ( "D:/Users/chatbot_emotions_detection/Datasets/Rows dataset (Arabic - Darija)/darija_moroccan_1.xlsx" )
darija_5 = pd.read_excel ( "D:/Users/chatbot_emotions_detection/Datasets/Rows dataset (Arabic - Darija)/darija_moroccan_2.xlsx" )
darija_6 = pd.read_excel ( "D:/Users/chatbot_emotions_detection/Datasets/Rows dataset (Arabic - Darija)/darija_moroccan_3.xlsx" )

In [25]:
darija = pd.concat (  [darija_1, darija_2, darija_3, darija_4, darija_5, darija_6] )

In [26]:
darija.reset_index ( inplace = True, drop = True )
darija

Unnamed: 0,tweet,label
0,@user على حسب موقعك يبدو أنك صاحب نظرة ثاقبة ....,negative
1,@user تبهليل هاذا,negative
2,@user هاذي تبهليل ماشي فهامة,negative
3,@user @user تخاف نجاوب يا ناصر ببلوك لانو طريق...,negative
4,@user مرنكة أقسم بالله 😂😂😂تبهليل ما بعد منتصف ...,negative
...,...,...
12056,مبروك الفوز مع اني مابعرف الفريق بس بما انك سع...,positive
12057,جنايات فاس رفضات تعطي السراح المؤقت للفايق ولب...,neutral
12058,ههههههه تا بنكيران كيتحكم فهدشي هههه شعب مكلخ,negative
12059,achemn adafir adafir li and rjala li andhom ke...,neutral


In [27]:
darija.to_excel ("D:/Users/chatbot_emotions_detection/Datasets/Rows dataset (Arabic - Darija)/darija_written_in_arabic.xlsx", index = False)

### Test 5

In [34]:
arabic_1 = pd.read_csv ( "D:/Users/chatbot_emotions_detection/Datasets/Rows dataset (Arabic - Darija)/arabic sentiments.csv" )
arabic_2 = pd.read_csv ( "D:/Users/chatbot_emotions_detection/Datasets/Rows dataset (Arabic - Darija)/arabic sentiments 2.csv" )
arabic_3 = pd.read_csv ( "D:/Users/chatbot_emotions_detection/Datasets/Rows dataset (Arabic - Darija)/arabic sentiments 3.csv" )

In [43]:
arabic = pd.concat ( [arabic_1, arabic_2, arabic_3] )
arabic.drop ( columns = "source", inplace = True )
arabic 

Unnamed: 0,text,label
0,نوال الزغبي (الشاب خالد ليس عالمي) هههههههه أت...,negative
1,تقول نوال الزغبي : http,neutral
2,نوال الزغبي لطيفه الفنانه الوحيده اللي كل الفي...,positive
3,لما قالت نوال الزغبي لابقلها هاللقب فرحوا فانز...,negative
4,الفنانة نوال الزغبي سنة 90 http,neutral
...,...,...
319,قال رسول الله ﷺ(إذا سمعتم الطاعون بأرض، فلا تد...,neutral
320,RT @user: ماركا | لم ينهزم ريال مدريد في أخر 2...,positive
321,RT @user: #مليارات_العمره_على_هوى_مصر سقوط بشا...,negative
322,حد معاه ويندوز 10 ؟,neutral


In [45]:
arabic.to_excel ( "D:/Users/chatbot_emotions_detection/Datasets/Rows dataset (Arabic - Darija)/arabic_sentiments.xlsx", index = False   )

In [48]:
data =  pd.read_csv ( "D:/Users/chatbot_emotions_detection/Datasets/Rows dataset (Arabic - Darija)/hatecheck_cases_final_arabic.csv" )
data

Unnamed: 0,mhc_case_id,functionality,test_case,label_gold,target_ident,ref_case_id,ref_templ_id,templ_id,case_templ,gender_male,gender_female,label_annotated,label_annotated_maj,disagreement_in_case,disagreement_in_template
0,arabic-1,derog_neg_emote_h,انا بكره النساء.,hateful,women,,,1,انا بكره ال[IDENT_P].,,,"['hateful', 'hateful', 'hateful']",hateful,False,False
1,arabic-2,derog_neg_emote_h,انا بكره المتحولين جنسيا.,hateful,trans people,,,1,انا بكره ال[IDENT_P].,,,"['hateful', 'hateful', 'hateful']",hateful,False,False
2,arabic-3,derog_neg_emote_h,انا بكره المثلين.,hateful,gay people,,,1,انا بكره ال[IDENT_P].,,,"['hateful', 'hateful', 'hateful']",hateful,False,False
3,arabic-4,derog_neg_emote_h,انا بكره السود.,hateful,black people,,,1,انا بكره ال[IDENT_P].,,,"['hateful', 'hateful', 'hateful']",hateful,False,False
4,arabic-5,derog_neg_emote_h,انا بكره اليهود.,hateful,Jews,,,1,انا بكره ال[IDENT_P].,,,"['hateful', 'hateful', 'hateful']",hateful,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3565,arabic-3573,spell_word_variation_h,المثلين كلهم أولاد متناكه.,hateful,gay people,1037.0,256.0,866,ال[IDENT_P] كلهم أولاد متناكه.,,,"['hateful', 'hateful', 'hateful']",hateful,False,False
3566,arabic-3574,spell_word_variation_h,السود كلهم أولاد متناكه.,hateful,black people,1038.0,256.0,866,ال[IDENT_P] كلهم أولاد متناكه.,,,"['hateful', 'hateful', 'hateful']",hateful,False,False
3567,arabic-3575,spell_word_variation_h,اليهود كلهم أولاد متناكه.,hateful,Jews,1039.0,256.0,866,ال[IDENT_P] كلهم أولاد متناكه.,,,"['hateful', 'hateful', 'hateful']",hateful,False,False
3568,arabic-3576,spell_word_variation_h,المعاقيين كلهم أولاد متناكه.,hateful,disabled people,1040.0,256.0,866,ال[IDENT_P] كلهم أولاد متناكه.,,,"['hateful', 'hateful', 'hateful']",hateful,False,False


In [55]:
data = data [ ["test_case", "label_gold"] ]

In [57]:
data.to_excel ( "D:/Users/chatbot_emotions_detection/Datasets/Rows dataset (Arabic - Darija)/arabic_hatefull_not_hatefull.xlsx" )

### Test 5

In [6]:
data = pd.read_csv ("D:/Users/chatbot_emotions_detection/Datasets/emtions_detection_datasets/arabic_comments_with_emotions.txt", sep = '\t')

In [7]:
data = data [["Post", "lang", "Emotion"]]
data

Unnamed: 0,Post,lang,Emotion
0,@gamehacker080 @WaellAmeer @Kempa2010 @omeralb...,ar,neutral
1,@arar12332095361 صل الله عليه و سلم,ar,love
2,@SRKFC1 @iamsrk @gaurikhan 💖💖💖💖💖💖 https://t.co...,und,love
3,.@realDonaldTrump YOU did this you started it!...,en,disgust
4,"وقال: "" ... وعليهم *أَنْ يكونوا على يقظةٍ* مِن...",ar,fear
...,...,...,...
8867,@peoplebelarabi @joeekaram @najwakaram مبروووو...,ar,love
8868,@happy_chemistry ربحتي المعركة ... مازالت الحرب 😎,ar,trust
8869,@Neshan كف كفين!! ومن يضربك كف كفين لما انت تغ...,ar,anger
8870,افتح أي هاشتاغ ترند في الجزائر تحس روحك في جري...,ar,disgust


In [8]:
data = data [ data["lang"] == "ar" ]
data

Unnamed: 0,Post,lang,Emotion
0,@gamehacker080 @WaellAmeer @Kempa2010 @omeralb...,ar,neutral
1,@arar12332095361 صل الله عليه و سلم,ar,love
4,"وقال: "" ... وعليهم *أَنْ يكونوا على يقظةٍ* مِن...",ar,fear
5,🌴🌴 *إحذر من المندسين*🌴 ⬅ قال الشيخ العلامة محم...,ar,sadness
7,@Imad78317250 إيه يا أخي عماد صحيح من قال عند ...,ar,disgust
...,...,...,...
8867,@peoplebelarabi @joeekaram @najwakaram مبروووو...,ar,love
8868,@happy_chemistry ربحتي المعركة ... مازالت الحرب 😎,ar,trust
8869,@Neshan كف كفين!! ومن يضربك كف كفين لما انت تغ...,ar,anger
8870,افتح أي هاشتاغ ترند في الجزائر تحس روحك في جري...,ar,disgust


In [9]:
data.Emotion.value_counts()

Emotion
hapiness        966
neutral         390
sadness         337
anger           281
trust           252
disgust         213
love            181
surprise        148
fear             56
anticipation     12
Name: count, dtype: int64

In [10]:
texts = data.Post.values[0:10]

In [14]:
data.to_excel ( "D:/Users/chatbot_emotions_detection/Datasets/emtions_detection_datasets/arabic_comments_with_emotions.xlsx", index = False )