# The NRC Emotion Intensity Lexicon (NRC-EIL)

In [17]:
import csv
import json
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
import re
%matplotlib inline

import nltk
from nltk import *
from nltk.text import Text
import string
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.tokenize import RegexpTokenizer
from nltk.collocations import BigramCollocationFinder
from nltk.stem import WordNetLemmatizer
from nltk.stem.porter import PorterStemmer
from nltk import FreqDist

### wczytanie słownika

In [4]:
data_dict = {}

with open("anger-NRC-Emotion-Lexicon.txt", "r") as file:
    reader = csv.reader(file, delimiter='\t')
    
    for row in reader:
        word = row[0]
        label = int(row[1])
        data_dict[word] = label

In [8]:
word_to_find = "kicking"
if word_to_find in data_dict:
    label_for_word = data_dict[word_to_find]
    if label_for_word == 1:
        print(f"The word '{word_to_find}' is: negative")
    elif label_for_word == 0:
        print(f"The word '{word_to_find}' is: positive")
else:
    print(f"'{word_to_find}' not found in the data.")



The word 'kicking' is: negative


In [7]:
for word, label in data_dict.items():
    print(f"Word: {word}, Label: {label}")

Word: idiotic, Label: 1
Word: offend, Label: 1
Word: strained, Label: 1
Word: punishment, Label: 1
Word: kicking, Label: 1
Word: hardened, Label: 1
Word: slaughter, Label: 1
Word: unfulfilled, Label: 1
Word: disillusionment, Label: 1
Word: imprisoned, Label: 1
Word: cacophony, Label: 1
Word: payback, Label: 1
Word: trickery, Label: 1
Word: retaliation, Label: 1
Word: venomous, Label: 1
Word: encumbrance, Label: 1
Word: lying, Label: 1
Word: recession, Label: 1
Word: remiss, Label: 1
Word: stingy, Label: 1
Word: defense, Label: 1
Word: suicide, Label: 1
Word: diabolical, Label: 1
Word: blasphemy, Label: 1
Word: destroyer, Label: 1
Word: gnome, Label: 1
Word: fierce, Label: 1
Word: selfish, Label: 1
Word: stolen, Label: 1
Word: slander, Label: 1
Word: tripping, Label: 1
Word: unforgiving, Label: 1
Word: insurrection, Label: 1
Word: wrangling, Label: 1
Word: shaky, Label: 1
Word: grudge, Label: 1
Word: latent, Label: 1
Word: scandalous, Label: 1
Word: mob, Label: 1
Word: exaggerate, Label

Word: gouge, Label: 0
Word: church, Label: 0
Word: hospice, Label: 0
Word: tripartite, Label: 0
Word: composed, Label: 0
Word: savings, Label: 0
Word: inadvertent, Label: 0
Word: thermodynamics, Label: 0
Word: lustrous, Label: 0
Word: provisionally, Label: 0
Word: green, Label: 0
Word: amorphous, Label: 0
Word: slippers, Label: 0
Word: displacement, Label: 0
Word: virginity, Label: 0
Word: put, Label: 0
Word: inaugural, Label: 0
Word: tinker, Label: 0
Word: bureau, Label: 0
Word: reinstate, Label: 0
Word: acuity, Label: 0
Word: unpleasant, Label: 0
Word: guise, Label: 0
Word: treatment, Label: 0
Word: drowsiness, Label: 0
Word: german, Label: 0
Word: aloha, Label: 0
Word: jag, Label: 0
Word: caterer, Label: 0
Word: contour, Label: 0
Word: attendant, Label: 0
Word: calipers, Label: 0
Word: courtship, Label: 0
Word: hop, Label: 0
Word: smoothness, Label: 0
Word: peaked, Label: 0
Word: accessory, Label: 0
Word: halve, Label: 0
Word: normal, Label: 0
Word: enlarged, Label: 0
Word: raised, 

Word: manufacturer, Label: 0
Word: thunder, Label: 0
Word: hoy, Label: 0
Word: persistent, Label: 0
Word: kos, Label: 0
Word: deer, Label: 0
Word: jaws, Label: 0
Word: votive, Label: 0
Word: overflowing, Label: 0
Word: hexagon, Label: 0
Word: somber, Label: 0
Word: department, Label: 0
Word: purpose, Label: 0
Word: sovereignty, Label: 0
Word: immaculate, Label: 0
Word: invitation, Label: 0
Word: concealed, Label: 0
Word: usefully, Label: 0
Word: indice, Label: 0
Word: arithmetic, Label: 0
Word: patronage, Label: 0
Word: wight, Label: 0
Word: overestimate, Label: 0
Word: wan, Label: 0
Word: oaf, Label: 0
Word: doughnut, Label: 0
Word: timeliness, Label: 0
Word: colonel, Label: 0
Word: voluntary, Label: 0
Word: countess, Label: 0
Word: situated, Label: 0
Word: mottled, Label: 0
Word: dogma, Label: 0
Word: philosophy, Label: 0
Word: amidst, Label: 0
Word: dart, Label: 0
Word: moderate, Label: 0
Word: lopsided, Label: 0
Word: gritty, Label: 0
Word: unpublished, Label: 0
Word: divers, Label

Word: gauge, Label: 0
Word: broke, Label: 0
Word: grace, Label: 0
Word: numerical, Label: 0
Word: generality, Label: 0
Word: muck, Label: 0
Word: centrifugal, Label: 0
Word: firmly, Label: 0
Word: arbitrator, Label: 0
Word: point, Label: 0
Word: overshadow, Label: 0
Word: beautification, Label: 0
Word: deceased, Label: 0
Word: shabby, Label: 0
Word: insular, Label: 0
Word: puddle, Label: 0
Word: enema, Label: 0
Word: corporate, Label: 0
Word: deference, Label: 0
Word: ajar, Label: 0
Word: junk, Label: 0
Word: produce, Label: 0
Word: breach, Label: 0
Word: unbroken, Label: 0
Word: shanty, Label: 0
Word: aliquot, Label: 0
Word: communion, Label: 0
Word: swoop, Label: 0
Word: sith, Label: 0
Word: grantee, Label: 0
Word: superstar, Label: 0
Word: moiety, Label: 0
Word: sanctioned, Label: 0
Word: hale, Label: 0
Word: armory, Label: 0
Word: accentuate, Label: 0
Word: previous, Label: 0
Word: petrol, Label: 0
Word: thrilling, Label: 0
Word: rhythmical, Label: 0
Word: starvation, Label: 0
Word

Word: deduct, Label: 0
Word: mechanical, Label: 0
Word: block, Label: 0
Word: pas, Label: 0
Word: police, Label: 0
Word: eel, Label: 0
Word: cruiser, Label: 0
Word: dexterity, Label: 0
Word: slender, Label: 0
Word: salesman, Label: 0
Word: joker, Label: 0
Word: yearbook, Label: 0
Word: excerpt, Label: 0
Word: robust, Label: 0
Word: material, Label: 0
Word: show, Label: 0
Word: violin, Label: 0
Word: sir, Label: 0
Word: alter, Label: 0
Word: destined, Label: 0
Word: inspired, Label: 0
Word: tense, Label: 0
Word: exorbitant, Label: 0
Word: endowment, Label: 0
Word: grasping, Label: 0
Word: search, Label: 0
Word: vale, Label: 0
Word: expectation, Label: 0
Word: paternal, Label: 0
Word: completion, Label: 0
Word: null, Label: 0
Word: amplitude, Label: 0
Word: syntax, Label: 0
Word: foregoing, Label: 0
Word: unicorn, Label: 0
Word: bubbling, Label: 0
Word: endemic, Label: 0
Word: lin, Label: 0
Word: arid, Label: 0
Word: thrifty, Label: 0
Word: gaping, Label: 0
Word: endowed, Label: 0
Word: 

### wczytanie datasetu

In [11]:
file_path = "ESConv.json"

with open(file_path, "r", encoding="utf-8") as file:
    dataset = json.load(file)

In [15]:
corpus_data = []
for corpus in dataset:
    dialog = corpus['dialog']
    corpus_data.append(dialog)

df = pd.DataFrame({'Dialog': pd.Series(corpus_data)})

seeker_df = pd.DataFrame(columns=['ConversationID', 'Seeker Dialog'])
for index, row in df.iterrows():
    dialog_list = row['Dialog']
    for dialog in dialog_list:
        if dialog['speaker'] == 'seeker':
            seeker_df = seeker_df.append({'ConversationID': index, 'Seeker Dialog': dialog['content']}, ignore_index=True)            
seeker_df

Unnamed: 0,ConversationID,Seeker Dialog
0,0,Hello\n
1,0,I am having a lot of anxiety about quitting my...
2,0,I have to deal with many people in hard financ...
3,0,"I do, but often they are not going to get back..."
4,0,That is true but sometimes I feel like I shoul...
5,0,Probably not. I was with the same company for ...
6,0,I could try. It mostly gets to me at the end o...
7,0,That is also true. Sometimes I wonder if it re...
8,0,That is true. Maybe I just need to sit down an...
9,0,It really is a big decision \n


In [18]:
## Normalization process

# Removing unnecessary characters such as newlines and extra spaces
seeker_df['Seeker Dialog'] = seeker_df['Seeker Dialog'].str.replace('\n', ' ')  # Replacing newline characters with spaces

# Converting the text to lowercase for consistency
seeker_df['Seeker Dialog'] = seeker_df['Seeker Dialog'].str.lower()

# Tokenizing the text into individual words or sentences
seeker_df['Tokens'] = seeker_df['Seeker Dialog'].apply(word_tokenize)

# Removing any stop words if required
stop_words = set(stopwords.words('english'))
seeker_df['Tokens'] = seeker_df['Tokens'].apply(lambda tokens: [token for token in tokens if token not in stop_words])

# Lemmatization or stemming to reduce words to their base forms
lemmatizer = WordNetLemmatizer()
seeker_df['Tokens'] = seeker_df['Tokens'].apply(lambda tokens: [lemmatizer.lemmatize(token) for token in tokens])

print(seeker_df[['ConversationID', 'Tokens']])

      ConversationID                                             Tokens
0                  0                                            [hello]
1                  0  [lot, anxiety, quitting, current, job, ., stre...
2                  0  [deal, many, people, hard, financial, situatio...
3                  0  [,, often, going, get, back, want, ., many, pe...
4                  0  [true, sometimes, feel, like, put, feeling, he...
5                  0  [probably, ., company, long, time, consistentl...
6                  0             [could, try, ., mostly, get, end, day]
7                  0  [also, true, ., sometimes, wonder, really, tho...
8                  0         [true, ., maybe, need, sit, really, think]
9                  0                            [really, big, decision]
10                 0                    [thank, different, perspective]
11                 0                                  [true, ., thanks]
12                 0                                            

In [21]:
# by_conv_df = seeker_df.groupby('ConversationID')['Seeker Dialog'].apply(lambda x: lexical_diversity(x)).reset_index(name='Lexical Diversity')


Unnamed: 0,ConversationID,Seeker Dialog,Tokens
0,0,hello,[hello]
1,0,i am having a lot of anxiety about quitting my...,"[lot, anxiety, quitting, current, job, ., stre..."
2,0,i have to deal with many people in hard financ...,"[deal, many, people, hard, financial, situatio..."
3,0,"i do, but often they are not going to get back...","[,, often, going, get, back, want, ., many, pe..."
4,0,that is true but sometimes i feel like i shoul...,"[true, sometimes, feel, like, put, feeling, he..."
5,0,probably not. i was with the same company for ...,"[probably, ., company, long, time, consistentl..."
6,0,i could try. it mostly gets to me at the end o...,"[could, try, ., mostly, get, end, day]"
7,0,that is also true. sometimes i wonder if it re...,"[also, true, ., sometimes, wonder, really, tho..."
8,0,that is true. maybe i just need to sit down an...,"[true, ., maybe, need, sit, really, think]"
9,0,it really is a big decision,"[really, big, decision]"


In [36]:
def read_sentiment_file(sentiment):
    data_dict = {}
    file = "NRC_Emotion_Lexicon/NRC_Emotion_Lexicon/OneFilePerEmotion/" + sentiment + "-NRC-Emotion-Lexicon.txt"
    
    with open(file, "r") as file:
        reader = csv.reader(file, delimiter='\t')
        
        for row in reader:
            word = row[0]
            label = int(row[1])
            data_dict[word] = label
    
    return data_dict

In [42]:
def wordSentimentAssociations(sentiment, word_to_find): # sentiment = [positive, negative]
    data_dict = read_sentiment_file(sentiment)
    
    if word_to_find in data_dict:
        label_for_word = data_dict[word_to_find]
        if label_for_word == 1:
            sentiment += 1
        elif label_for_word == 0:
            sentiment += 0
    else:
        print(f"'{word_to_find}' not found in the data.")

    #for word, label in data_dict.items():
    #    print(f"Word: {word}, Sentiment: {label}")

        
    return sentiment

In [47]:
by_conv_df['Sentiment'] = seeker_df.groupby('ConversationID')['Seeker Dialog'].apply(wordSentimentAssociations)

TypeError: wordSentimentAssociations() missing 1 required positional argument: 'word_to_find'

In [None]:
def wordSentimentAssociations(sentiment, word_to_find): # sentiment = [positive, negative]
    data_dict = {}
    file = sentiment + "-NRC-Emotion-Lexicon.txt"
    
    sentiment = 0 # negative = 1, positive = 0

    with open(file, "r") as file:
        reader = csv.reader(file, delimiter='\t')
    
        for row in reader:
            word = row[0]
            label = int(row[1])
            data_dict[word] = label
            
         
    if word_to_find in data_dict:
        label_for_word = data_dict[word_to_find]
        if label_for_word == 1:
            sentiment += 1
        elif label_for_word == 0:
            sentiment += 0
    else:
        print(f"'{word_to_find}' not found in the data.")

    for word, label in data_dict.items():
        print(f"Word: {word}, Sentiment: {label}")

        
    return #