## Set up local environment
Import libraries

In [68]:
import pandas as pd
from scipy import stats
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

Read comments as data frame

In [2]:
df = pd.read_csv('../../dat/scored_comments.csv')
df.head()

Unnamed: 0,comment,author,date,score
0,1:11 amazing how missing features that previou...,Ivan Galvagno,2022-05-12T13:06:58Z,0.129
1,How tf they managed to screw this up?,Rbarzevedo,2022-05-12T12:35:43Z,0.167
2,Still a weird choice to put a remix of kicksta...,Some Turkey Nuts,2022-05-11T21:27:56Z,0.09
3,Im still waiting for the full game to come out.,Dozer,2022-05-11T13:57:38Z,0.0
4,My dads generation had Bad Company. \nMy gener...,Leadkiss,2022-05-11T12:21:19Z,0.28


Initialize VADER

In [69]:
m = SentimentIntensityAnalyzer()

## Example for a single comment
Extract a single comment from the entire comment section and manually repeat the process carried out by LIME.

In [82]:
# Extract one comment from `df`
c = df.iloc[1]
comment = c['comment']
print('Selected comment:', comment, sep='\n  ')

# Comment into list
words = [word for word in comment.split(' ')]
print('Comment as list of words:', words, sep='\n  ')

Selected comment:
  How tf they managed to screw this up?
Comment as list of words:
  ['How', 'tf', 'they', 'managed', 'to', 'screw', 'this', 'up?']


Randomly take out words from comment

In [83]:
# Initialize bernoulli distribution
B = stats.bernoulli(p=0.8)

# Select words based on Bernoulli random variable (20 times)
d = []
for i in range(20):
    d.append(list(B.rvs(len(words), random_state=42+i)))
    
# Modifications to data frame
t = pd.DataFrame(data=d, columns=words)

# Remove duplicates
t = t.drop_duplicates()

# Set weight based on closeness to original text
t['weight'] = t.sum(axis=1) / len(words)

# Drop entries where closeness is 1
t = t[t['weight'] < 1].reset_index(drop=True)

# Get original score
t['score0'] = c['score']

# Visualize
t

Unnamed: 0,How,tf,they,managed,to,screw,this,up?,weight,score0
0,1,0,1,1,1,1,1,0,0.75,0.167
1,1,1,1,1,1,0,1,1,0.875,0.167
2,0,1,1,1,1,1,1,1,0.875,0.167
3,1,0,1,1,1,1,1,1,0.875,0.167
4,1,0,1,1,1,1,0,1,0.75,0.167
5,1,1,0,0,1,1,1,1,0.75,0.167
6,0,1,1,1,1,1,1,0,0.75,0.167
7,0,1,1,1,1,1,0,1,0.75,0.167
8,1,1,1,1,1,0,0,1,0.75,0.167
9,0,1,1,1,1,0,1,1,0.75,0.167


In [86]:
# Initialize empty list
scores = []

for i in t.index.values:
    # Extract modified version of text
    mod = t.iloc[i, :-1]
    mod = ' '.join(list(mod[mod > 0].index.values))

    # Append score to list
    scores.append(m.polarity_scores(mod)['neg'])

# Add new scores to `t`
t['score1'] = scores

# Visualize
t

Unnamed: 0,How,tf,they,managed,to,screw,this,up?,weight,score0,score1
0,1,0,1,1,1,1,1,0,0.75,0.167,0.167
1,1,1,1,1,1,0,1,1,0.875,0.167,0.0
2,0,1,1,1,1,1,1,1,0.875,0.167,0.149
3,1,0,1,1,1,1,1,1,0.875,0.167,0.149
4,1,0,1,1,1,1,0,1,0.75,0.167,0.167
5,1,1,0,0,1,1,1,1,0.75,0.167,0.167
6,0,1,1,1,1,1,1,0,0.75,0.167,0.167
7,0,1,1,1,1,1,0,1,0.75,0.167,0.167
8,1,1,1,1,1,0,0,1,0.75,0.167,0.0
9,0,1,1,1,1,0,1,1,0.75,0.167,0.0
