In [1]:
import os
import re
import random
import numpy as np 
import pandas as pd 
from textblob import TextBlob
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from transformers import pipeline

In [2]:
data = pd.read_csv("data/reddit_wsb.csv")

In [3]:
data.head()

Unnamed: 0,title,score,id,url,comms_num,created,body,timestamp
0,"It's not about the money, it's about sending a...",55,l6ulcx,https://v.redd.it/6j75regs72e61,6,1611863000.0,,2021-01-28 21:37:41
1,Math Professor Scott Steiner says the numbers ...,110,l6uibd,https://v.redd.it/ah50lyny62e61,23,1611862000.0,,2021-01-28 21:32:10
2,Exit the system,0,l6uhhn,https://www.reddit.com/r/wallstreetbets/commen...,47,1611862000.0,The CEO of NASDAQ pushed to halt trading “to g...,2021-01-28 21:30:35
3,NEW SEC FILING FOR GME! CAN SOMEONE LESS RETAR...,29,l6ugk6,https://sec.report/Document/0001193125-21-019848/,74,1611862000.0,,2021-01-28 21:28:57
4,"Not to distract from GME, just thought our AMC...",71,l6ufgy,https://i.redd.it/4h2sukb662e61.jpg,156,1611862000.0,,2021-01-28 21:26:56


In [4]:
sia = SentimentIntensityAnalyzer()
# create a sentiment analysis pipeline using BERT
classifier = pipeline('sentiment-analysis', model='nlptown/bert-base-multilingual-uncased-sentiment')

In [5]:
def get_sentiment_nltk_compound(sia, text):
    if sia.polarity_scores(text)["compound"] > 0:
        return "Positive"
    elif sia.polarity_scores(text)["compound"] < 0:
        return "Negative"
    else:
        return "Neutral"

def get_sentiment_nltk(sia, text):
    senti = sia.polarity_scores(text)
    if senti["neg"] <= 0.05:
        if senti["pos"]-senti["neg"] > 0:
            return "Postive"
        else:
            return "Neutral"
    elif senti["pos"] <= 0.05:
        if senti["pos"]-senti["neg"] <= 0:
            return "Negative"
        else:
            return "Neutral"
    else:
        return "Neutral"
    
def get_sentiment_textblob(text):
    blob = TextBlob(text)
    if blob.sentiment.polarity > 0:
        return "Postive"
    elif blob.sentiment.polarity < 0:
        return "Negative"
    else:
        return "Neutral"
    
def get_sentiment_bert(text):
    return classifier(text)

In [7]:
data['NLTK_compound'] = data['title'].apply(lambda x: get_sentiment_nltk_compound(sia,x))
data['NLTK'] = data['title'].apply(lambda x: get_sentiment_nltk(sia,x))
data['Textblob'] = data['title'].apply(lambda x: get_sentiment_textblob(x))
#data['BERT'] = data['title'].apply(lambda x: get_sentiment_bert(x))

In [8]:
data.head()

Unnamed: 0,title,score,id,url,comms_num,created,body,timestamp,NLTK_compound,NLTK,Textblob
0,"It's not about the money, it's about sending a...",55,l6ulcx,https://v.redd.it/6j75regs72e61,6,1611863000.0,,2021-01-28 21:37:41,Neutral,Neutral,Neutral
1,Math Professor Scott Steiner says the numbers ...,110,l6uibd,https://v.redd.it/ah50lyny62e61,23,1611862000.0,,2021-01-28 21:32:10,Negative,Negative,Neutral
2,Exit the system,0,l6uhhn,https://www.reddit.com/r/wallstreetbets/commen...,47,1611862000.0,The CEO of NASDAQ pushed to halt trading “to g...,2021-01-28 21:30:35,Neutral,Neutral,Neutral
3,NEW SEC FILING FOR GME! CAN SOMEONE LESS RETAR...,29,l6ugk6,https://sec.report/Document/0001193125-21-019848/,74,1611862000.0,,2021-01-28 21:28:57,Negative,Neutral,Negative
4,"Not to distract from GME, just thought our AMC...",71,l6ufgy,https://i.redd.it/4h2sukb662e61.jpg,156,1611862000.0,,2021-01-28 21:26:56,Positive,Postive,Postive


In [9]:
data['title'][3]

'NEW SEC FILING FOR GME! CAN SOMEONE LESS RETARDED THAN ME PLEASE INTERPRET?'

In [10]:
data[0:20]

Unnamed: 0,title,score,id,url,comms_num,created,body,timestamp,NLTK_compound,NLTK,Textblob
0,"It's not about the money, it's about sending a...",55,l6ulcx,https://v.redd.it/6j75regs72e61,6,1611863000.0,,2021-01-28 21:37:41,Neutral,Neutral,Neutral
1,Math Professor Scott Steiner says the numbers ...,110,l6uibd,https://v.redd.it/ah50lyny62e61,23,1611862000.0,,2021-01-28 21:32:10,Negative,Negative,Neutral
2,Exit the system,0,l6uhhn,https://www.reddit.com/r/wallstreetbets/commen...,47,1611862000.0,The CEO of NASDAQ pushed to halt trading “to g...,2021-01-28 21:30:35,Neutral,Neutral,Neutral
3,NEW SEC FILING FOR GME! CAN SOMEONE LESS RETAR...,29,l6ugk6,https://sec.report/Document/0001193125-21-019848/,74,1611862000.0,,2021-01-28 21:28:57,Negative,Neutral,Negative
4,"Not to distract from GME, just thought our AMC...",71,l6ufgy,https://i.redd.it/4h2sukb662e61.jpg,156,1611862000.0,,2021-01-28 21:26:56,Positive,Postive,Postive
5,WE BREAKING THROUGH,405,l6uf7d,https://i.redd.it/2wef8tc062e61.png,84,1611862000.0,,2021-01-28 21:26:30,Neutral,Neutral,Neutral
6,SHORT STOCK DOESN'T HAVE AN EXPIRATION DATE,317,l6uf6d,https://www.reddit.com/r/wallstreetbets/commen...,53,1611862000.0,Hedgefund whales are spreading disinfo saying ...,2021-01-28 21:26:27,Neutral,Neutral,Neutral
7,THIS IS THE MOMENT,405,l6ub9l,https://www.reddit.com/r/wallstreetbets/commen...,178,1611862000.0,Life isn't fair. My mother always told me that...,2021-01-28 21:19:31,Neutral,Neutral,Neutral
8,Currently Holding AMC and NOK - Is it retarded...,200,l6ub4i,https://i.redd.it/6k2z7ouo42e61.png,161,1611862000.0,,2021-01-28 21:19:16,Negative,Negative,Negative
9,I have nothing to say but BRUH I am speechless...,291,l6uas9,https://i.redd.it/bfzzw2yo42e61.jpg,27,1611862000.0,,2021-01-28 21:18:37,Neutral,Neutral,Neutral


In [None]:
data.to_csv("result.csv")

In [13]:
data.to_csv('./result.csv', header=True, index=False, columns=list(data.axes[1]))