In [1]:
from nrc import *

# constants and setup
FILENAME = 'data/gutenberg/dorian_gray.txt'
FILTERS = set(stopwords.words('english')) | set(string.punctuation) | {'--'}

sentiments = ['negative', 'positive']
emotions = ['anger', 'anticipation', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'trust']

lexicon = load_lexicon()

# get token vectors
token_vectors = get_token_vectors_from_text(FILENAME, lexicon, FILTERS)

...Loading NRC lexicon.
...Loading data from data/gutenberg/dorian_gray.txt.


TypeError: get_token_vectors_from_sentences() missing 1 required positional argument: 'lexicon'

In [86]:
# filter out token_vectors with less than sum of 50
token_vectors = {
    t: v for (t, v) in token_vectors.items() if v.sum() > 50
}

In [87]:
# create emotion vectors
emotion_vectors = {
    t: np.concatenate([v[:5], v[7:]]) for (t, v) in token_vectors.items()
}

normalize_token_vectors(emotion_vectors)

# create sentiment vectors
sentiment_vectors = {
    t: v[5:7] for (t, v) in token_vectors.items()
}

normalize_token_vectors(sentiment_vectors)

...Normalizing token vectors.
...Normalizing token vectors.


In [88]:
emotion_df = pd.DataFrame(data=emotion_vectors.values(), index=emotion_vectors.keys(), columns=emotions)
sentiment_df = pd.DataFrame(data=sentiment_vectors.values(), index=sentiment_vectors.keys(), columns=sentiments)

In [89]:
emotion_df.sort_values(by=['trust'], ascending=False)

Unnamed: 0,anger,anticipation,disgust,fear,joy,sadness,surprise,trust
servant,0.056604,0.113208,0.075472,0.056604,0.075472,0.094340,0.018868,0.509434
sir,0.057325,0.095541,0.044586,0.082803,0.082803,0.089172,0.063694,0.484076
taught,0.041667,0.020833,0.125000,0.104167,0.145833,0.062500,0.020833,0.479167
gentleman,0.078652,0.067416,0.067416,0.089888,0.101124,0.112360,0.022472,0.460674
fellow,0.033333,0.100000,0.066667,0.083333,0.116667,0.050000,0.100000,0.450000
...,...,...,...,...,...,...,...,...
horrid,0.216495,0.051546,0.226804,0.195876,0.051546,0.216495,0.020619,0.020619
grinning,0.142857,0.102041,0.142857,0.204082,0.081633,0.204082,0.102041,0.020408
horrible,0.240143,0.053763,0.247312,0.275986,0.032258,0.100358,0.032258,0.017921
shot,0.204819,0.036145,0.108434,0.228916,0.000000,0.228916,0.180723,0.012048


In [94]:
emotion_df.loc[['man', 'woman','boy', 'girl']]

Unnamed: 0,anger,anticipation,disgust,fear,joy,sadness,surprise,trust
man,0.100147,0.147275,0.097202,0.12813,0.131075,0.14433,0.092784,0.159057
woman,0.065789,0.131579,0.118421,0.138158,0.157895,0.144737,0.092105,0.151316
boy,0.074803,0.133858,0.23622,0.094488,0.145669,0.102362,0.062992,0.149606
girl,0.092683,0.15122,0.082927,0.092683,0.195122,0.121951,0.092683,0.170732


In [95]:
sentiment_df.loc[['man', 'woman','boy', 'girl']]

Unnamed: 0,negative,positive
man,0.416667,0.583333
woman,0.45977,0.54023
boy,0.588235,0.411765
girl,0.382022,0.617978


negative    0.45977
positive    0.54023
Name: woman, dtype: float64

In [62]:
sentiment_df.sort_values(by=['negative'], ascending=False)

Unnamed: 0,negative,positive
shot,0.961538,0.038462
agony,0.933333,0.066667
destroyed,0.928571,0.071429
dens,0.916667,0.083333
omen,0.888889,0.111111
...,...,...
pray,0.100000,0.900000
service,0.066667,0.933333
birth,0.058824,0.941176
prince,0.055556,0.944444
