In [1]:
import nltk
from nltk.corpus import wordnet


In [2]:
# download the necessary data if needed
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\abhis\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\abhis\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [3]:
# define the text to analyze
text = "I feel so happy and excited to be here with you today."

In [4]:
# tokenize the text and get the POS tags for each word
tokens = nltk.word_tokenize(text)
pos_tags = nltk.pos_tag(tokens)

# define the NAVA categories
nava_categories = ['NN', 'NNS', 'NNP', 'NNPS', 'JJ', 'JJR', 'JJS', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'RB', 'RBR', 'RBS']

# filter the words by NAVA categories
affect_bearing_words = [word for word, tag in pos_tags if tag in nava_categories]

# lemmatize the affect-bearing words
lemmatizer = nltk.WordNetLemmatizer()
affect_bearing_words = [lemmatizer.lemmatize(word) for word in affect_bearing_words]

# remove stopwords and punctuation
stopwords = set(nltk.corpus.stopwords.words('english'))
affect_bearing_words = [word for word in affect_bearing_words if word not in stopwords and word.isalpha()]

# filter out words that are not found in WordNet
affect_bearing_words = [word for word in affect_bearing_words if len(wordnet.synsets(word)) > 0]

# print the affect-bearing words
print(affect_bearing_words)


['feel', 'happy', 'excited', 'today']


In [5]:
from nltk.corpus import sentiwordnet as swn
from nltk.corpus import wordnet as wn
from nltk.corpus import opinion_lexicon


In [10]:
import nltk
nltk.download('sentiwordnet')

[nltk_data] Downloading package sentiwordnet to
[nltk_data]     C:\Users\abhis\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\sentiwordnet.zip.


True

In [35]:
from collections import defaultdict
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\abhis\AppData\Roaming\nltk_data...


In [36]:
emotions = ["anger", "anticipation", "disgust", "fear", "joy", "sadness", "surprise", "trust"]
pos_mapping = {"anger": wn.NOUN, "anticipation": wn.NOUN, "disgust": wn.NOUN, "fear": wn.NOUN, "joy": wn.NOUN, "sadness": wn.NOUN, "surprise": wn.NOUN, "trust": wn.NOUN}


In [40]:
def get_emotion_vector(word):
    synsets = wn.synsets(word)
    if not synsets:
        return None
    synset = synsets[0]
    emotion_vector = [0] * len(emotions)
    scores = sia.polarity_scores(word)
    for i, emotion in enumerate(emotions):
        if scores['pos'] > scores['neg'] and emotion in ['joy', 'trust', 'anticipation']:
            emotion_vector[i] = 1
        elif scores['neg'] > scores['pos'] and emotion in ['sadness', 'fear', 'anger', 'disgust']:
            emotion_vector[i] = 1
    return emotion_vector


In [41]:
for word in affect_bearing_words:
    emotion_vector = get_emotion_vector(word)
    if emotion_vector is not None:
        print(f"{word.capitalize()} vector: {emotion_vector}")

Feel vector: [0, 0, 0, 0, 0, 0, 0, 0]
Happy vector: [0, 1, 0, 0, 1, 0, 0, 1]
Excited vector: [0, 1, 0, 0, 1, 0, 0, 1]
Today vector: [0, 0, 0, 0, 0, 0, 0, 0]
