# RedditTitle analysis

In [1]:
import requests
import json
import time
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA
import nltk
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as FF

print (__version__) # requires version >= 1.9.0
nltk.download('vader_lexicon')
init_notebook_mode(connected=True)

2.3.0
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\nicol\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
reddit_thread = "The_Donald"
hdr = {'User-Agent': 'windows:r/' + reddit_thread + '.single.result:v1.0' +
       '/u/MrBabiole'}
url = 'https://www.reddit.com/r/' + reddit_thread + '/.json'
req = requests.get(url, headers=hdr)
json_data = json.loads(req.text)

In [3]:
posts = json.dumps(json_data['data']['children'], indent=4, sort_keys=True)
print(posts)

[
    {
        "data": {
            "approved_at_utc": null,
            "approved_by": null,
            "archived": false,
            "author": "Chisesi",
            "author_flair_css_class": null,
            "author_flair_text": null,
            "banned_at_utc": null,
            "banned_by": null,
            "brand_safe": false,
            "can_gild": false,
            "can_mod_post": false,
            "clicked": false,
            "contest_mode": false,
            "created": 1518816359.0,
            "created_utc": 1518787559.0,
            "distinguished": null,
            "domain": "self.The_Donald",
            "downs": 0,
            "edited": false,
            "gilded": 0,
            "hidden": false,
            "hide_score": true,
            "id": "7xyp6g",
            "is_crosspostable": false,
            "is_reddit_media_domain": false,
            "is_self": true,
            "is_video": false,
            "likes": null,
            "link_flair_css_class":

In [4]:
data_all = json_data['data']['children']
num_of_posts = 0
while len(data_all) <= 100:
    time.sleep(2)
    last = data_all[-1]['data']['name']
    url = 'https://www.reddit.com/r/' + reddit_thread + '/.json?after=' + str(last)
    req = requests.get(url, headers=hdr)
    data = json.loads(req.text)
    data_all += data['data']['children']
    if num_of_posts == len(data_all):
        break
    else:
        num_of_posts = len(data_all)

## Labeling data

In [5]:
sia = SIA()
pos_list = []
neg_list = []
neu_list = []
for post in data_all:
    res = sia.polarity_scores(post['data']['title'])
    print(res)
    
    if res['compound'] > 0.2:
        pos_list.append(post['data']['title'])
    elif res['compound'] < -0.2:
        neg_list.append(post['data']['title'])
    else:
        neu_list.append(post['data']['title'])

with open("pos_news_titles.txt", "w", encoding='utf-8',
          errors='ignore') as f_pos:
    for post in pos_list:
        f_pos.write(post + "\n")

with open("neg_news_titles.txt", "w", encoding='utf-8',
          errors='ignore') as f_neg:
    for post in neg_list:
        f_neg.write(post + "\n")

{'neu': 0.756, 'compound': -0.3818, 'neg': 0.138, 'pos': 0.107}
{'neu': 0.821, 'compound': -0.533, 'neg': 0.122, 'pos': 0.057}
{'neu': 0.769, 'compound': 0.4019, 'neg': 0.0, 'pos': 0.231}
{'neu': 0.854, 'compound': -0.34, 'neg': 0.146, 'pos': 0.0}
{'neu': 0.517, 'compound': 0.4215, 'neg': 0.0, 'pos': 0.483}
{'neu': 1.0, 'compound': 0.0, 'neg': 0.0, 'pos': 0.0}
{'neu': 0.722, 'compound': 0.7322, 'neg': 0.0, 'pos': 0.278}
{'neu': 0.693, 'compound': 0.6115, 'neg': 0.0, 'pos': 0.307}
{'neu': 0.83, 'compound': 0.4335, 'neg': 0.0, 'pos': 0.17}
{'neu': 0.578, 'compound': 0.7154, 'neg': 0.125, 'pos': 0.297}
{'neu': 0.767, 'compound': 0.7165, 'neg': 0.0, 'pos': 0.233}
{'neu': 0.87, 'compound': 0.128, 'neg': 0.0, 'pos': 0.13}
{'neu': 0.654, 'compound': -0.6904, 'neg': 0.269, 'pos': 0.077}
{'neu': 0.584, 'compound': 0.69, 'neg': 0.139, 'pos': 0.277}
{'neu': 0.945, 'compound': 0.1027, 'neg': 0.0, 'pos': 0.055}
{'neu': 0.244, 'compound': 0.4753, 'neg': 0.0, 'pos': 0.756}
{'neu': 0.909, 'compound': 

## Displaying result

In [6]:
data = [go.Bar(
            x=['neutral', 'positive', 'negative'],
            y=[len(neu_list), len(pos_list), len(neg_list)]
    )]
layout = go.Layout(
    title="Sentiment type distribution in Reddit " + reddit_thread + " thread",
    plot_bgcolor='rgb(230, 230,230)'
)

fig = go.Figure(data=data, layout=layout)

#Display the graph
iplot(fig, filename='sentiment analysis title')