# Research on posts in Blizzard subreddit

In [149]:
import praw
import pandas as pd
import datetime as dt
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA
nltk.download()

showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml


True

In [150]:
# Function for later formatting
def get_date(date):
    return dt.datetime.fromtimestamp(date).date()

In [151]:
# Reddit API(Need to type in your own code to make it work)
reddit = praw.Reddit(client_id='*******',
                    client_secret='**************',
                    user_agent='**************')

In [152]:
# Parse data from posts that include keyword "mobile" and store it in data dictionary
blizzard = reddit.subreddit("Blizzard")
mobile = blizzard.search('mobile', limit = 1000)
titles = set()
data = {
    "title":[],
    "date":[],
    "body":[]
}
for article in mobile:
    data['title'].append(article.title)
    data['date'].append(article.created)
    data['body'].append(article.selftext)
post_mobile = pd.DataFrame(data)

In [153]:
# Cleaning date column and limit posts created after 2018 Blizzcon. Also, I want to merge post title and content. 
post_mobile['date'] = post_mobile['date'].apply(get_date)
post_mobile = post_mobile.loc[post_mobile['date'] > dt.date(2018,11,1)]
post_mobile['text'] = post_mobile["title"].map(str) + post_mobile["body"]

In [154]:
# Use NLTK package to get sentiment score for each posts
sia = SIA()
results = []

for line in post_mobile['text']:
    pol_score = sia.polarity_scores(line)
    results.append(pol_score)

In [156]:
# This is how the sentiment score looks like
results

[{'neg': 0.0, 'neu': 0.905, 'pos': 0.095, 'compound': 0.25},
 {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0},
 {'neg': 0.0, 'neu': 0.8, 'pos': 0.2, 'compound': 0.3612},
 {'neg': 0.166, 'neu': 0.688, 'pos': 0.147, 'compound': -0.9436},
 {'neg': 0.069, 'neu': 0.817, 'pos': 0.114, 'compound': 0.8336},
 {'neg': 0.133, 'neu': 0.845, 'pos': 0.022, 'compound': -0.954},
 {'neg': 0.225, 'neu': 0.775, 'pos': 0.0, 'compound': -0.4404},
 {'neg': 0.0, 'neu': 0.703, 'pos': 0.297, 'compound': 0.5859},
 {'neg': 0.096, 'neu': 0.802, 'pos': 0.102, 'compound': 0.2521},
 {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0},
 {'neg': 0.095, 'neu': 0.692, 'pos': 0.213, 'compound': 0.6704},
 {'neg': 0.0, 'neu': 0.872, 'pos': 0.128, 'compound': 0.3612},
 {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0},
 {'neg': 0.0, 'neu': 0.838, 'pos': 0.162, 'compound': 0.4404},
 {'neg': 0.0, 'neu': 0.757, 'pos': 0.243, 'compound': 0.9828},
 {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0},
 {'neg': 0.0, 

In [165]:
# Store sentiment score in dataframe with post created date
df = pd.DataFrame.from_records(results)
df['date'] = post_mobile['date']
df.head()

Unnamed: 0,compound,neg,neu,pos,date
0,0.25,0.0,0.905,0.095,2019-07-08
1,0.0,0.0,1.0,0.0,2019-06-06
2,0.3612,0.0,0.8,0.2,2019-07-11
3,-0.9436,0.166,0.688,0.147,2019-06-29
4,0.8336,0.069,0.817,0.114,2019-06-18


In [185]:
# label 1(positive) if sentiment score is above 0, label 0(negative) if sentiment score is below 0
df['label'] = 0
df.loc[df['compound'] > 0, 'label'] = 1
df.loc[df['compound'] < 0, 'label'] = -1
df.head()

Unnamed: 0,compound,neg,neu,pos,date,label
0,0.25,0.0,0.905,0.095,2019-07-08,1
1,0.0,0.0,1.0,0.0,2019-06-06,0
2,0.3612,0.0,0.8,0.2,2019-07-11,1
3,-0.9436,0.166,0.688,0.147,2019-06-29,-1
4,0.8336,0.069,0.817,0.114,2019-06-18,1


In [186]:
print(df.label.value_counts(normalize=True))

 1    0.465116
-1    0.274419
 0    0.260465
Name: label, dtype: float64


In [187]:
# create list for everydays date from 2018 Blizzcon(11/2/2018) to now
base = dt.datetime.today().date()
numdays = base - dt.date(2018,11,2)
date_list = [base - dt.timedelta(days=x) for x in range(numdays.days)]

In [188]:
# Store cumulative percentage of posts expressing the sentiment in dictionary
PN = {
    'positive':[],
    'negative':[],
    'neutral':[],
    'untillthedate':[]
}
for date in date_list:
    score = df.loc[df['date'] < date,'label'].value_counts(normalize = True)
    PN['positive'].append(score[1])
    PN['negative'].append(score[-1])
    PN['neutral'].append(score[0])
    PN['untillthedate'].append(date)

In [191]:
# Store in dataframe and export a csv file
PNRovertime = pd.DataFrame(PN)
PNRovertime.to_csv('PNN_rate_reddit.csv')

In [190]:
# This is how it looks like
PNRovertime

Unnamed: 0,positive,negative,neutral,untillthedate
0,0.466981,0.268868,0.264151,2019-08-23
1,0.466981,0.268868,0.264151,2019-08-22
2,0.466981,0.268868,0.264151,2019-08-21
3,0.466981,0.268868,0.264151,2019-08-20
4,0.466981,0.268868,0.264151,2019-08-19
5,0.466981,0.268868,0.264151,2019-08-18
6,0.466981,0.268868,0.264151,2019-08-17
7,0.466981,0.268868,0.264151,2019-08-16
8,0.466981,0.268868,0.264151,2019-08-15
9,0.466981,0.268868,0.264151,2019-08-14


In [184]:
# reference: Bird, Steven, Edward Loper and Ewan Klein (2009), Natural Language Processing with Python. O’Reilly Media Inc.