# Coverage of Lexicons

Determine the coverage of each lexicon used in V3 and V4 in percentage of the vocabulary generated using the method from the Gao CNN.

In [61]:
# Import packages.
from io import BytesIO
from tensorflow.python.lib.io import file_io
import msgpack
import numpy as np
import pandas as pd

## Read in embeddings and lexicons.

In [62]:
# Read in the word embedding.
f = BytesIO(file_io.read_file_to_string('wikimedia-personal-attacks-200-embeddings.npy', binary_mode=True))
vocab = np.load(f)

# Later we can use this as a check that we haven't added any rows or
# changed any indexes, ensuring the integrity of the embeddings has
# been preserved.
print('vocab size:', len(vocab))

# The first row is padding so the vocab indexes start at 1.
word_embeddings_df = pd.DataFrame(data=vocab)
word_embeddings_df.head()

vocab size: 36995


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,190,191,192,193,194,195,196,197,198,199
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.443647,0.128096,0.704819,-0.413601,-0.615326,0.510453,0.081257,-0.65561,-0.357054,-0.387023,...,0.600914,-0.826428,-0.043708,-0.699184,-0.719986,0.412833,0.103456,-1.462709,0.466236,0.383801
2,-0.429288,-0.016554,0.384819,0.113181,-1.462845,0.261847,-0.875737,-0.451588,0.548763,-0.459796,...,0.000568,0.446239,0.302111,-0.397178,-0.675476,0.05389,0.523819,0.075026,0.311938,-0.055834
3,0.674015,0.280386,0.416335,-0.870266,0.490471,-0.532294,0.146954,-0.918494,0.201321,-0.624855,...,-0.092502,0.504301,0.192321,-0.295859,0.08463,-0.107387,0.118977,0.602776,-0.274779,-0.051925
4,-0.007329,-0.039159,0.599992,-0.76129,-0.340942,-0.756368,-0.926788,0.02548,0.299273,-0.697997,...,-0.180548,-0.031146,-0.675957,-0.666593,0.342779,0.510544,0.873011,0.061182,0.068458,0.256529


In [63]:
# Read in the python dictionaries of the lexicons
# from which to build the lexicon embeddings.
f = BytesIO(file_io.read_file_to_string('AFINN-96-lex.bin', binary_mode=True))
lex_afinn = msgpack.unpack(f, raw=False)

f = BytesIO(file_io.read_file_to_string('MSOL-June15-09-numeric.bin', binary_mode=True))
lex_msol = msgpack.unpack(f, raw=False)

f = BytesIO(file_io.read_file_to_string('bing-liu-opinion-lex.bin', binary_mode=True))
lex_bing = msgpack.unpack(f, raw=False)

f = BytesIO(file_io.read_file_to_string('NRC-EmoLex-polarity.bin', binary_mode=True))
lex_emolex = msgpack.unpack(f, raw=False)

f = BytesIO(file_io.read_file_to_string('abusive-words-lex-first-occ.bin', binary_mode=True))
lex_abusive = msgpack.unpack(f, raw=False)

In [64]:
# Convert AFINN to pandas dataframe.
lex_afinn_df = pd.DataFrame.from_dict(lex_afinn, orient='index', columns=['afinn_polarity']) 
lex_afinn_df.head()

Unnamed: 0,afinn_polarity
abandon,-0.4
abandons,-0.4
abandoned,-0.4
absentee,-0.2
absentees,-0.2


In [65]:
# Convert MSOL to pandas dataframe.
lex_msol_df = pd.DataFrame.from_dict(lex_msol, orient='index', columns=['msol_polarity']) 
lex_msol_df.head()

Unnamed: 0,msol_polarity
10cc,1.0
12-16-18-foot_skiff,1.0
"2,4,5-t",-1.0
"2,4-d",-1.0
3-d,1.0


In [66]:
# Convert bing to pandas dataframe.
lex_bing_df = pd.DataFrame.from_dict(lex_bing, orient='index', columns=['bing_polarity']) 
lex_bing_df.head()

Unnamed: 0,bing_polarity
a+,1.0
abound,1.0
abounds,1.0
abundance,1.0
abundant,1.0


In [67]:
# Convert emolex to pandas dataframe.
lex_emolex_df = pd.DataFrame.from_dict(lex_emolex, orient='index', columns=['emolex_polarity']) 
lex_emolex_df.head()

Unnamed: 0,emolex_polarity
abandon,-1.0
abandoned,-1.0
abandonment,-1.0
abba,1.0
abduction,-1.0


In [68]:
# Convert abusive words to pandas dataframe.
lex_abusive_df = pd.DataFrame.from_dict(lex_abusive) 
lex_abusive_df.head()

Unnamed: 0,abusive_lex
187,0.328716
1984,-0.043314
419,0.247129
86,0.082784
abandon,-0.097071


## V3 Lexicons (AFINN polarity, MSOL polarity, Bing polarity, EmoLex polarity)

In [69]:
# Join the dataframes on the words.
merged_lex_df = pd.merge(lex_afinn_df, lex_msol_df, left_index=True, right_index=True, how='outer')
merged_lex_df = pd.merge(merged_lex_df, lex_bing_df, left_index=True, right_index=True, how='outer')
merged_lex_df = pd.merge(merged_lex_df, lex_emolex_df, left_index=True, right_index=True, how='outer')
merged_lex_df.head(10)

Unnamed: 0,afinn_polarity,msol_polarity,bing_polarity,emolex_polarity
10cc,,1.0,,
12-16-18-foot_skiff,,1.0,,
"2,4,5-t",,-1.0,,
"2,4-d",,-1.0,,
2-faced,,,-1.0,
2-faces,,,-1.0,
3-d,,1.0,,
420,,1.0,,
505,,1.0,,
a,,-1.0,,


## Prep lexicons to be joined with the word embeddings.

In [70]:
# Since words exists in each lexicon that are not in others, 
# treating both as sparse data.
# Replacing NaN with zeros.
merged_lex_df.fillna(0, inplace=True)
merged_lex_df.head()

Unnamed: 0,afinn_polarity,msol_polarity,bing_polarity,emolex_polarity
10cc,0.0,1.0,0.0,0.0
12-16-18-foot_skiff,0.0,1.0,0.0,0.0
"2,4,5-t",0.0,-1.0,0.0,0.0
"2,4-d",0.0,-1.0,0.0,0.0
2-faced,0.0,0.0,-1.0,0.0


In [71]:
# Now merge the lexicon dimensions with the word embeddings using the vocab.

# Get id-to-word dict previously created while generating word embeddings.
f = BytesIO(file_io.read_file_to_string('id2word.bin', binary_mode=True))
id2word = msgpack.unpack(f, raw=False)
id2word_df = pd.DataFrame.from_dict(id2word, orient='index', columns=['id'])
id2word_df.head()

Unnamed: 0,id
14,this
9,is
16,not
3450,creative
1,.


In [72]:
# Merge the id-to-word dict with word embeddings, then set words as index, 
# which allows the lexicons to be easily merged since both have the words 
# as the index already.
word_emb_merged = pd.merge(word_embeddings_df, id2word_df, left_index=True, right_index=True, how='outer')
word_emb_merged = word_emb_merged.set_index('id')
word_emb_merged.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,190,191,192,193,194,195,196,197,198,199
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.,0.443647,0.128096,0.704819,-0.413601,-0.615326,0.510453,0.081257,-0.65561,-0.357054,-0.387023,...,0.600914,-0.826428,-0.043708,-0.699184,-0.719986,0.412833,0.103456,-1.462709,0.466236,0.383801
the,-0.429288,-0.016554,0.384819,0.113181,-1.462845,0.261847,-0.875737,-0.451588,0.548763,-0.459796,...,0.000568,0.446239,0.302111,-0.397178,-0.675476,0.05389,0.523819,0.075026,0.311938,-0.055834
to,0.674015,0.280386,0.416335,-0.870266,0.490471,-0.532294,0.146954,-0.918494,0.201321,-0.624855,...,-0.092502,0.504301,0.192321,-0.295859,0.08463,-0.107387,0.118977,0.602776,-0.274779,-0.051925
and,-0.007329,-0.039159,0.599992,-0.76129,-0.340942,-0.756368,-0.926788,0.02548,0.299273,-0.697997,...,-0.180548,-0.031146,-0.675957,-0.666593,0.342779,0.510544,0.873011,0.061182,0.068458,0.256529


In [73]:
# Now merge lexicon dimensions with word embedding dimensions.
word_lex_emb_merged = pd.merge(word_emb_merged, merged_lex_df, left_index=True, right_index=True, how='outer')
word_lex_emb_merged.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,194,195,196,197,198,199,afinn_polarity,msol_polarity,bing_polarity,emolex_polarity
!,0.719402,0.289279,-0.273394,-0.528267,-0.297721,-0.127302,-0.276557,0.444393,-0.759141,-0.210949,...,0.162053,0.286156,-0.064141,-0.999607,-0.819493,0.76035,,,,
.,0.443647,0.128096,0.704819,-0.413601,-0.615326,0.510453,0.081257,-0.65561,-0.357054,-0.387023,...,-0.719986,0.412833,0.103456,-1.462709,0.466236,0.383801,,,,
0,-0.356113,-0.015441,0.185449,0.24063,-0.112532,0.395168,0.261011,-0.261993,0.226032,0.540223,...,0.619772,-0.195389,0.21292,0.67014,-0.460036,0.082508,,,,
00,-0.69056,0.259574,0.428379,0.403884,-0.752359,0.95652,0.459817,0.117626,0.170718,0.654727,...,0.392507,-0.209626,-0.147662,0.033319,-0.259417,0.417896,,,,
000,-0.134573,-0.890869,0.339105,0.84503,-0.100801,0.964447,0.199437,-0.270517,0.339413,1.024041,...,0.062363,-0.216607,0.817551,-0.624438,0.471237,0.194961,,,,


In [74]:
# Replace NaN's in only the lexicon embedding columns with 0,
# (so rows with NaN's in the word embeddings can be dropped after).
word_lex_emb_merged['afinn_polarity'].fillna(0.0, inplace=True)
word_lex_emb_merged['msol_polarity'].fillna(0.0, inplace=True)
word_lex_emb_merged['bing_polarity'].fillna(0.0, inplace=True)
word_lex_emb_merged['emolex_polarity'].fillna(0.0, inplace=True)
word_lex_emb_merged.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,194,195,196,197,198,199,afinn_polarity,msol_polarity,bing_polarity,emolex_polarity
!,0.719402,0.289279,-0.273394,-0.528267,-0.297721,-0.127302,-0.276557,0.444393,-0.759141,-0.210949,...,0.162053,0.286156,-0.064141,-0.999607,-0.819493,0.76035,0.0,0.0,0.0,0.0
.,0.443647,0.128096,0.704819,-0.413601,-0.615326,0.510453,0.081257,-0.65561,-0.357054,-0.387023,...,-0.719986,0.412833,0.103456,-1.462709,0.466236,0.383801,0.0,0.0,0.0,0.0
0,-0.356113,-0.015441,0.185449,0.24063,-0.112532,0.395168,0.261011,-0.261993,0.226032,0.540223,...,0.619772,-0.195389,0.21292,0.67014,-0.460036,0.082508,0.0,0.0,0.0,0.0
00,-0.69056,0.259574,0.428379,0.403884,-0.752359,0.95652,0.459817,0.117626,0.170718,0.654727,...,0.392507,-0.209626,-0.147662,0.033319,-0.259417,0.417896,0.0,0.0,0.0,0.0
000,-0.134573,-0.890869,0.339105,0.84503,-0.100801,0.964447,0.199437,-0.270517,0.339413,1.024041,...,0.062363,-0.216607,0.817551,-0.624438,0.471237,0.194961,0.0,0.0,0.0,0.0


In [75]:
# Now drop the rows that have NaN for word embedding values 
# to get back to original vocabulary.
word_lex_emb_merged.dropna(inplace=True)
word_lex_emb_merged.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,194,195,196,197,198,199,afinn_polarity,msol_polarity,bing_polarity,emolex_polarity
!,0.719402,0.289279,-0.273394,-0.528267,-0.297721,-0.127302,-0.276557,0.444393,-0.759141,-0.210949,...,0.162053,0.286156,-0.064141,-0.999607,-0.819493,0.76035,0.0,0.0,0.0,0.0
.,0.443647,0.128096,0.704819,-0.413601,-0.615326,0.510453,0.081257,-0.65561,-0.357054,-0.387023,...,-0.719986,0.412833,0.103456,-1.462709,0.466236,0.383801,0.0,0.0,0.0,0.0
0,-0.356113,-0.015441,0.185449,0.24063,-0.112532,0.395168,0.261011,-0.261993,0.226032,0.540223,...,0.619772,-0.195389,0.21292,0.67014,-0.460036,0.082508,0.0,0.0,0.0,0.0
00,-0.69056,0.259574,0.428379,0.403884,-0.752359,0.95652,0.459817,0.117626,0.170718,0.654727,...,0.392507,-0.209626,-0.147662,0.033319,-0.259417,0.417896,0.0,0.0,0.0,0.0
000,-0.134573,-0.890869,0.339105,0.84503,-0.100801,0.964447,0.199437,-0.270517,0.339413,1.024041,...,0.062363,-0.216607,0.817551,-0.624438,0.471237,0.194961,0.0,0.0,0.0,0.0


In [76]:
# Confirm we're back to original vocab size.
print('vocab size:', len(word_lex_emb_merged))

vocab size: 36995


In [77]:
# Determine coverage for each lexicon by summing number of 
# non-zero values for each of the lexicon columns.
word_lex_emb_merged[['afinn_polarity', 'msol_polarity', 
                     'bing_polarity', 'emolex_polarity']].astype(bool).sum(axis=0)

afinn_polarity      1237
msol_polarity      14365
bing_polarity       3760
emolex_polarity     4091
dtype: int64

In [78]:
# Calculate coverage (%) for each separately.
vocab_size = len(word_lex_emb_merged) - 1 # Subtract the zero-th padding row.
print('afinn_polarity coverage: ', 1237.0 / vocab_size * 100)
print('msol_polarity coverage: ', 14365.0 / vocab_size * 100)
print('bing_polarity coverage: ', 3760.0 / vocab_size * 100)
print('emolex_polarity coverage: ', 4091.0 / vocab_size * 100)

afinn_polarity coverage:  3.34378547872628
msol_polarity coverage:  38.83062118181327
bing_polarity coverage:  10.163810347623938
emolex_polarity coverage:  11.058550035140833


In [79]:
# Determine coverage by all.
afinn_cov = word_lex_emb_merged['afinn_polarity'] != 0
msol_polarity_cov = word_lex_emb_merged['msol_polarity'] != 0
bing_polarity_cov = word_lex_emb_merged['bing_polarity'] != 0
emolex_polarity_cov = word_lex_emb_merged['emolex_polarity'] != 0

In [80]:
print("Coverage by all four: ", len(word_lex_emb_merged[afinn_cov & msol_polarity_cov & bing_polarity_cov & emolex_polarity_cov]))
print("Coverage by all four (%)", 420/len(word_lex_emb_merged) * 100)

Coverage by all four:  420
Coverage by all four (%) 1.1352885525070955


## V4 Lexicons (AFINN polarity, MSOL polarity, Bing polarity, EmoLex polarity, Abusive Words first occurance)

In [81]:
# Merge abusive words with V3 set.
word_lex_emb_merged = pd.merge(word_lex_emb_merged, lex_abusive_df, left_index=True, right_index=True, how='outer')
word_lex_emb_merged.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,195,196,197,198,199,afinn_polarity,msol_polarity,bing_polarity,emolex_polarity,abusive_lex
!,0.719402,0.289279,-0.273394,-0.528267,-0.297721,-0.127302,-0.276557,0.444393,-0.759141,-0.210949,...,0.286156,-0.064141,-0.999607,-0.819493,0.76035,0.0,0.0,0.0,0.0,
.,0.443647,0.128096,0.704819,-0.413601,-0.615326,0.510453,0.081257,-0.65561,-0.357054,-0.387023,...,0.412833,0.103456,-1.462709,0.466236,0.383801,0.0,0.0,0.0,0.0,
0,-0.356113,-0.015441,0.185449,0.24063,-0.112532,0.395168,0.261011,-0.261993,0.226032,0.540223,...,-0.195389,0.21292,0.67014,-0.460036,0.082508,0.0,0.0,0.0,0.0,
00,-0.69056,0.259574,0.428379,0.403884,-0.752359,0.95652,0.459817,0.117626,0.170718,0.654727,...,-0.209626,-0.147662,0.033319,-0.259417,0.417896,0.0,0.0,0.0,0.0,
000,-0.134573,-0.890869,0.339105,0.84503,-0.100801,0.964447,0.199437,-0.270517,0.339413,1.024041,...,-0.216607,0.817551,-0.624438,0.471237,0.194961,0.0,0.0,0.0,0.0,
0000,-0.160366,-0.175099,0.359133,0.386547,-0.32512,0.939776,0.330288,0.503121,-0.303726,0.825554,...,0.021648,0.067934,0.276323,-0.36432,0.391017,0.0,0.0,0.0,0.0,
000000,-0.703432,-0.410802,0.131273,-0.344313,-0.346428,0.036551,-0.467358,0.712626,-0.082894,0.086276,...,-0.571037,-0.473111,0.571625,-0.480657,0.707137,0.0,0.0,0.0,0.0,
000ft,-0.294269,-0.666662,0.602464,0.705479,-0.657206,1.220048,0.116402,-0.723018,0.271957,0.781907,...,-0.570813,0.11004,0.413911,-0.468938,0.386412,0.0,0.0,0.0,0.0,
000s,-0.324249,-0.851926,0.090279,0.258359,0.048292,0.779359,0.198883,-0.329399,0.053351,0.290034,...,0.177721,0.572717,-0.592665,-0.219068,-0.055364,0.0,0.0,0.0,0.0,
000|,-0.189656,-0.034421,-0.273435,-0.152859,0.495937,-0.301283,-0.801648,0.025831,-0.342452,-0.7477,...,-0.548357,-0.612839,0.098037,-0.047212,-0.267147,0.0,0.0,0.0,0.0,


In [82]:
# Fill NaNs with zeros.
word_lex_emb_merged['abusive_lex'].fillna(0.0, inplace=True)
word_lex_emb_merged.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,195,196,197,198,199,afinn_polarity,msol_polarity,bing_polarity,emolex_polarity,abusive_lex
!,0.719402,0.289279,-0.273394,-0.528267,-0.297721,-0.127302,-0.276557,0.444393,-0.759141,-0.210949,...,0.286156,-0.064141,-0.999607,-0.819493,0.76035,0.0,0.0,0.0,0.0,0.0
.,0.443647,0.128096,0.704819,-0.413601,-0.615326,0.510453,0.081257,-0.65561,-0.357054,-0.387023,...,0.412833,0.103456,-1.462709,0.466236,0.383801,0.0,0.0,0.0,0.0,0.0
0,-0.356113,-0.015441,0.185449,0.24063,-0.112532,0.395168,0.261011,-0.261993,0.226032,0.540223,...,-0.195389,0.21292,0.67014,-0.460036,0.082508,0.0,0.0,0.0,0.0,0.0
00,-0.69056,0.259574,0.428379,0.403884,-0.752359,0.95652,0.459817,0.117626,0.170718,0.654727,...,-0.209626,-0.147662,0.033319,-0.259417,0.417896,0.0,0.0,0.0,0.0,0.0
000,-0.134573,-0.890869,0.339105,0.84503,-0.100801,0.964447,0.199437,-0.270517,0.339413,1.024041,...,-0.216607,0.817551,-0.624438,0.471237,0.194961,0.0,0.0,0.0,0.0,0.0


In [89]:
# Now drop the rows that have NaN for word embedding values 
# to get back to original vocabulary.
word_lex_emb_merged.dropna(inplace=True)
word_lex_emb_merged.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,195,196,197,198,199,afinn_polarity,msol_polarity,bing_polarity,emolex_polarity,abusive_lex
!,0.719402,0.289279,-0.273394,-0.528267,-0.297721,-0.127302,-0.276557,0.444393,-0.759141,-0.210949,...,0.286156,-0.064141,-0.999607,-0.819493,0.76035,0.0,0.0,0.0,0.0,0.0
.,0.443647,0.128096,0.704819,-0.413601,-0.615326,0.510453,0.081257,-0.65561,-0.357054,-0.387023,...,0.412833,0.103456,-1.462709,0.466236,0.383801,0.0,0.0,0.0,0.0,0.0
0,-0.356113,-0.015441,0.185449,0.24063,-0.112532,0.395168,0.261011,-0.261993,0.226032,0.540223,...,-0.195389,0.21292,0.67014,-0.460036,0.082508,0.0,0.0,0.0,0.0,0.0
00,-0.69056,0.259574,0.428379,0.403884,-0.752359,0.95652,0.459817,0.117626,0.170718,0.654727,...,-0.209626,-0.147662,0.033319,-0.259417,0.417896,0.0,0.0,0.0,0.0,0.0
000,-0.134573,-0.890869,0.339105,0.84503,-0.100801,0.964447,0.199437,-0.270517,0.339413,1.024041,...,-0.216607,0.817551,-0.624438,0.471237,0.194961,0.0,0.0,0.0,0.0,0.0


In [90]:
# Determine coverage for each lexicon by summing number of 
# non-zero values for each of the lexicon columns.
word_lex_emb_merged[['afinn_polarity', 'msol_polarity', 
                     'bing_polarity', 'emolex_polarity', 
                     'abusive_lex']].astype(bool).sum(axis=0)

afinn_polarity      1237
msol_polarity      14365
bing_polarity       3760
emolex_polarity     4091
abusive_lex         3231
dtype: int64

In [95]:
# Calculate coverage (%) for each separately.
vocab_size = len(word_lex_emb_merged) - 1 # Subtract the zero-th padding row.
print('afinn_polarity coverage: ', 1237.0 / vocab_size * 100)
print('msol_polarity coverage: ', 14365.0 / vocab_size * 100)
print('bing_polarity coverage: ', 3760.0 / vocab_size * 100)
print('emolex_polarity coverage: ', 4091.0 / vocab_size * 100)
print('abusive_lex coverage: ', 3231.0 / vocab_size * 100)

afinn_polarity coverage:  3.34378547872628
msol_polarity coverage:  38.83062118181327
bing_polarity coverage:  10.163810347623938
emolex_polarity coverage:  11.058550035140833
abusive_lex coverage:  8.733848732226848


In [92]:
# Determine coverage by all.
afinn_cov = word_lex_emb_merged['afinn_polarity'] != 0
msol_polarity_cov = word_lex_emb_merged['msol_polarity'] != 0
bing_polarity_cov = word_lex_emb_merged['bing_polarity'] != 0
emolex_polarity_cov = word_lex_emb_merged['emolex_polarity'] != 0
abusive_lex_cov = word_lex_emb_merged['abusive_lex'] != 0

print("Coverage by all five: ", len(word_lex_emb_merged[afinn_cov & msol_polarity_cov & bing_polarity_cov & emolex_polarity_cov & abusive_lex_cov]))
print("Coverage by all five (%)", 258/len(word_lex_emb_merged) * 100)

Coverage by all five:  258
Coverage by all five (%) 0.6973915393972159


In [94]:
word_lex_emb_merged[afinn_cov & msol_polarity_cov & bing_polarity_cov & emolex_polarity_cov & abusive_lex_cov].head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,195,196,197,198,199,afinn_polarity,msol_polarity,bing_polarity,emolex_polarity,abusive_lex
accidental,0.033746,-0.102887,0.238122,0.060379,0.30669,0.729172,0.016707,0.066349,-0.404524,-0.51194,...,-0.673757,-0.888718,0.269389,-0.048764,0.020526,-0.4,-1.0,-1.0,-1.0,-0.105053
accusation,0.469207,-0.45921,0.049328,-0.399464,0.102734,0.695916,0.878991,-0.276687,-0.957246,-0.744902,...,-0.06271,0.205403,-0.064159,-0.849825,-0.133795,-0.4,-1.0,-1.0,-1.0,0.001577
afraid,0.541328,-0.467239,-0.021896,-0.4742,-0.843039,-0.034977,0.833632,0.605724,0.611852,-0.588616,...,-0.238192,0.415488,-0.259801,-0.759163,0.157438,-0.4,-1.0,-1.0,-1.0,-0.167514
aggression,-0.472514,-0.066919,0.712302,-0.005263,-0.675342,0.989755,0.296975,0.142779,-0.483076,-0.184087,...,-0.726967,-0.557026,0.331551,-0.510336,0.543412,-0.4,-1.0,-1.0,-1.0,-0.09644
aggressive,-0.061554,0.482369,0.406225,-0.678482,-0.055096,0.536343,0.329181,-0.085694,-0.701427,0.005902,...,-0.421925,-0.583272,0.589014,-0.869469,-0.151321,-0.4,-1.0,-1.0,-1.0,-0.138924
