In [1]:
dataset_name = "EmoEvent (Raw)"
llm_augmenter = 'original' # "Llama3.1 8B instruct-q8"
labels_column = "labels"    # emotion

limit_columns = False
limit_column_names = ["event", "tweet", "emotion"]

view_subsets = True
subset_column = "labels"

In [2]:
from config_files import dataset_config
from config_files import LLM_config
dataset_metadata = dataset_config.dataset[dataset_name]

import pandas as pd

if llm_augmenter == "original":
    file_location = f"../{dataset_metadata['relpath']}"
    
    if dataset_metadata["filetype"] == "csv":
        dataframe = pd.read_csv(file_location)
    elif dataset_metadata["filetype"] == "tsv":
        dataframe = pd.read_csv(file_location, sep="\t")
    
    
    dataframe.drop(columns = dataset_metadata["unused_columns"], inplace=True)
    dataframe.rename(columns = dataset_metadata["remap_columns"], inplace=True)       
    
else:
    llm_metadata = LLM_config.model[llm_augmenter]
    file_location = f"../synthetic_datasets/{dataset_metadata['id']}/{llm_metadata['id'].replace(":", "_")}.parquet"
    dataframe = pd.read_parquet(path=file_location)
    
if limit_columns:
    display(dataframe[limit_column_names])
else:
    display(dataframe)

label_count =  pd.Series(dataframe.labels).value_counts()
print(label_count)

display(pd.Series(dataframe[labels_column]).value_counts())

Unnamed: 0,text,labels
0,I know that the Notre Dame is a very important...,others
1,#BREAKING: (USER) -- Trump threatens `full an...,others
2,#Barcelona will win La Liga with three games t...,others
3,HT: Decent half. A goal would've been good tho...,others
4,In the 20th century we had weeping statues of ...,others
...,...,...
7261,With regime change yet to take hold in #Venezu...,fear
7262,#NotreDameCathedralFire Fantastic the response...,others
7263,For those of you questioning the Arya thing. P...,others
7264,Respect for people in france who've lost a his...,sadness


labels
others      3283
joy         2034
disgust      760
sadness      414
anger        390
surprise     234
fear         151
Name: count, dtype: int64


labels
others      3283
joy         2034
disgust      760
sadness      414
anger        390
surprise     234
fear         151
Name: count, dtype: int64

In [3]:
if view_subsets:
    for subset in dataframe[subset_column].unique():
        print(f"------------------- {subset} -------------------")
        subset_df = dataframe[dataframe[subset_column] == subset]
        if limit_columns:
            display(subset_df[limit_column_names])
        else:
            display(subset_df)
        display(pd.Series(subset_df[labels_column]).value_counts())

------------------- others -------------------


Unnamed: 0,text,labels
0,I know that the Notre Dame is a very important...,others
1,#BREAKING: (USER) -- Trump threatens `full an...,others
2,#Barcelona will win La Liga with three games t...,others
3,HT: Decent half. A goal would've been good tho...,others
4,In the 20th century we had weeping statues of ...,others
...,...,...
7257,Today is #WorldBookDay &amp; we're still celeb...,others
7259,What is happening in #Venezuela should be used...,others
7260,I'm hyperventilating. this episode is too inte...,others
7262,#NotreDameCathedralFire Fantastic the response...,others


labels
others    3283
Name: count, dtype: int64

------------------- disgust -------------------


Unnamed: 0,text,labels
8,USER People of #Venezuela !! so hilarious movi...,disgust
22,👧 Who is #GretaThunberg? One of the world’s 10...,disgust
27,It would be nice if the international correspo...,disgust
39,"""The man who DOES NOT read good books has no a...",disgust
51,Well damn hiding in the crypts was a bad idea ...,disgust
...,...,...
7231,Today is #SpainElection whereas #Spain country...,disgust
7233,"USER USER #Maduro needs to go, period; After w...",disgust
7239,Guys please report this account. They’re a tro...,disgust
7247,Hot take: the Vatican had it's hand in #NotreD...,disgust


labels
disgust    760
Name: count, dtype: int64

------------------- joy -------------------


Unnamed: 0,text,labels
10,Remarkable match &amp; Incredible performance ...,joy
12,#AvengersEndGame may have had the visual and w...,joy
14,Lmao. Liverpool are done 😂. 🐐 Makes it 2-0 #U...,joy
23,Struggle is real folks... Happy #worldbookday ...,joy
26,USER It's here! Smaller than I expected but P...,joy
...,...,...
7228,"OK I'm done, all hail king Messi #messi #UEFAC...",joy
7235,Happy #WorldBookDay! What are some of your fav...,joy
7244,This #worldbookday we are celebrating all form...,joy
7255,#WorldBookDay in words of #IvoAndric #NobelPri...,joy


labels
joy    2034
Name: count, dtype: int64

------------------- anger -------------------


Unnamed: 0,text,labels
11,That episode of Game Of Thrones was not as gre...,anger
25,That damned Global warming strikes again.... #...,anger
28,I will only back the #CMPPowerline through #Ma...,anger
33,USER Do not doubt there they will be met by re...,anger
63,USER USER_Wheeler Mindless people can't tell s...,anger
...,...,...
7178,#NotreDameCathedralFire will be good to see it...,anger
7227,This has to end. #Venezuela people deserve to ...,anger
7243,USER have you and your followers been watching...,anger
7251,USER you and your party only offer lipservice...,anger


labels
anger    390
Name: count, dtype: int64

------------------- sadness -------------------


Unnamed: 0,text,labels
15,I can’t believe what happened to The Notre Dam...,sadness
44,I Wish I Could Cry Loudly For Notre Dame. It i...,sadness
56,"I don't feel the joy of winning #LaLiga, and I...",sadness
62,As sad as #NotreDameCathedralFire is and lets ...,sadness
77,Watching such a beatiful and historically impo...,sadness
...,...,...
7234,So sad to hear about the #NotreDameCathedralFi...,sadness
7236,Despite the score. I am proud of my team. We p...,sadness
7237,We were wasteful in the final third. It’s what...,sadness
7253,Bravery. Commitment. A firefighter on their...,sadness


labels
sadness    414
Name: count, dtype: int64

------------------- surprise -------------------


Unnamed: 0,text,labels
24,The outcome of #SpainElection is less chaotic ...,surprise
55,Best free kick I’ve ever seen. WOW. The guy is...,surprise
66,Gonna have to stay up with another can to calm...,surprise
76,French fan did this video. for some reason…so ...,surprise
106,Just wow! Look what’s happening in #Venezuela...,surprise
...,...,...
7018,Are players scared of him or is he just that f...,surprise
7067,"At the risk of beinbg another spoiler, just on...",surprise
7131,What a goal unbelievable! Damn Messi is incred...,surprise
7143,"As a Theon hater, I do have to say I actually ...",surprise


labels
surprise    234
Name: count, dtype: int64

------------------- fear -------------------


Unnamed: 0,text,labels
29,Reason #1 to own an AR-15 (or any other arms)....,fear
37,Biting my nails watching the Barca defense run...,fear
108,Playing against Barcelona in the semi final of...,fear
131,Topics: 1) #Venezuela 2) Rise in #Antisemitism...,fear
133,Secure your freedom and do it now. #Venezuela,fear
...,...,...
7035,#France: 'ISIS fanatics warn of a future attac...,fear
7065,“I want you to panic.” https://t.co/W9SnITIIyJ...,fear
7135,Ahhhh!! Whom do I root for today?? 😑😑 Its like...,fear
7242,"Oh, the horror on being atop a dragon during a...",fear


labels
fear    151
Name: count, dtype: int64

In [7]:
search_df = dataframe[dataframe['labels'] == 'joy']
search_df

Unnamed: 0,text,labels
10,Remarkable match &amp; Incredible performance ...,joy
12,#AvengersEndGame may have had the visual and w...,joy
14,Lmao. Liverpool are done 😂. 🐐 Makes it 2-0 #U...,joy
23,Struggle is real folks... Happy #worldbookday ...,joy
26,USER It's here! Smaller than I expected but P...,joy
...,...,...
7228,"OK I'm done, all hail king Messi #messi #UEFAC...",joy
7235,Happy #WorldBookDay! What are some of your fav...,joy
7244,This #worldbookday we are celebrating all form...,joy
7255,#WorldBookDay in words of #IvoAndric #NobelPri...,joy


In [9]:
search_df[search_df['text'].str.contains('#Greta')]

Unnamed: 0,text,labels
95,🔴Breaking.... MPs vote unanimously to be the...,joy
102,This Friday celebrate #FridaysForFuture a bril...,joy
219,"USER 16 year old Swede #GretaThunberg, has mor...",joy
259,USER USER USER #ExtinctionRebelion #GretaThunb...,joy
269,I wish I was as bold &amp; brave as #GretaThun...,joy
...,...,...
7073,Listen to our 16-year-old #HERO speaking about...,joy
7078,We are so excited that young people are not si...,joy
7091,There really is something beautiful in hearing...,joy
7098,So much respect for #GretaThunberg not only be...,joy
