# RedditTitle analysis

In [1]:
import requests
import json
import time
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA
import nltk
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as FF

print (__version__) # requires version >= 1.9.0
nltk.download('vader_lexicon')
init_notebook_mode(connected=True)

2.3.0
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\nicol\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
reddit_thread = "litecoin"
hdr = {'User-Agent': 'windows:r/' + reddit_thread + '.single.result:v1.0' +
       '/u/MrBabiole'}
url = 'https://www.reddit.com/r/' + reddit_thread + '/.json'
req = requests.get(url, headers=hdr)
json_data = json.loads(req.text)

In [3]:
posts = json.dumps(json_data['data']['children'], indent=4, sort_keys=True)
print(posts)

[
    {
        "data": {
            "approved_at_utc": null,
            "approved_by": null,
            "archived": false,
            "author": "andonevris",
            "author_flair_css_class": "1337",
            "author_flair_text": "1337",
            "banned_at_utc": null,
            "banned_by": null,
            "brand_safe": true,
            "can_gild": false,
            "can_mod_post": false,
            "clicked": false,
            "contest_mode": false,
            "created": 1518709762.0,
            "created_utc": 1518680962.0,
            "distinguished": null,
            "domain": "litecoin-foundation.org",
            "downs": 0,
            "edited": false,
            "gilded": 0,
            "hidden": false,
            "hide_score": false,
            "id": "7xoyix",
            "is_crosspostable": false,
            "is_reddit_media_domain": false,
            "is_self": false,
            "is_video": false,
            "likes": null,
            "link_f

In [4]:
data_all = json_data['data']['children']
num_of_posts = 0
while len(data_all) <= 100:
    time.sleep(2)
    last = data_all[-1]['data']['name']
    url = 'https://www.reddit.com/r/' + reddit_thread + '/.json?after=' + str(last)
    req = requests.get(url, headers=hdr)
    data = json.loads(req.text)
    data_all += data['data']['children']
    if num_of_posts == len(data_all):
        break
    else:
        num_of_posts = len(data_all)

## Labeling data

In [5]:
sia = SIA()
pos_list = []
neg_list = []
neu_list = []
for post in data_all:
    res = sia.polarity_scores(post['data']['title'])
    print(res)
    
    if res['compound'] > 0.2:
        pos_list.append(post['data']['title'])
    elif res['compound'] < -0.2:
        neg_list.append(post['data']['title'])
    else:
        neu_list.append(post['data']['title'])

with open("pos_news_titles.txt", "w", encoding='utf-8',
          errors='ignore') as f_pos:
    for post in pos_list:
        f_pos.write(post + "\n")

with open("neg_news_titles.txt", "w", encoding='utf-8',
          errors='ignore') as f_neg:
    for post in neg_list:
        f_neg.write(post + "\n")

{'neu': 1.0, 'neg': 0.0, 'pos': 0.0, 'compound': 0.0}
{'neu': 1.0, 'neg': 0.0, 'pos': 0.0, 'compound': 0.0}
{'neu': 0.63, 'neg': 0.0, 'pos': 0.37, 'compound': 0.5719}
{'neu': 1.0, 'neg': 0.0, 'pos': 0.0, 'compound': 0.0}
{'neu': 1.0, 'neg': 0.0, 'pos': 0.0, 'compound': 0.0}
{'neu': 0.85, 'neg': 0.0, 'pos': 0.15, 'compound': 0.4588}
{'neu': 0.706, 'neg': 0.0, 'pos': 0.294, 'compound': 0.5147}
{'neu': 0.4, 'neg': 0.0, 'pos': 0.6, 'compound': 0.8176}
{'neu': 0.811, 'neg': 0.0, 'pos': 0.189, 'compound': 0.2732}
{'neu': 1.0, 'neg': 0.0, 'pos': 0.0, 'compound': 0.0}
{'neu': 0.513, 'neg': 0.0, 'pos': 0.487, 'compound': 0.5859}
{'neu': 0.755, 'neg': 0.0, 'pos': 0.245, 'compound': 0.3818}
{'neu': 0.575, 'neg': 0.425, 'pos': 0.0, 'compound': -0.4019}
{'neu': 0.656, 'neg': 0.0, 'pos': 0.344, 'compound': 0.6369}
{'neu': 0.658, 'neg': 0.0, 'pos': 0.342, 'compound': 0.3818}
{'neu': 1.0, 'neg': 0.0, 'pos': 0.0, 'compound': 0.0}
{'neu': 1.0, 'neg': 0.0, 'pos': 0.0, 'compound': 0.0}
{'neu': 1.0, 'neg':

## Displaying result

In [6]:
data = [go.Bar(
            x=['neutral', 'positive', 'negative'],
            y=[len(neu_list), len(pos_list), len(neg_list)]
    )]

py.iplot(data, filename='basic-bar')