In [5]:
import requests
import json
from config import n_key
from domains import domains
import os
import pandas as pd
#run pip install nltk in your terminal with your PythonData environment activated if you have not already installed nltk
import nltk 
# nltk.download('vader_lexicon') <- you will need to run this the first time you run this code 
from nltk.sentiment.vader import SentimentIntensityAnalyzer
SID = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\scotw\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [6]:
# change these variables to your desired search and your initials 
keyword = 'refugees'
initials = 'SAW'

In [7]:
# n_key is your API key for NewsAPI
# keyword is the word you are interested in searching
# source_list is the list of US news sources available in NewsAPI
# initials is a string of your initials

def getNewsAPIData(n_key, keyword, domains, initials):
    base_url = ('http://newsapi.org/v2/everything?')
    params = {
        'language': 'en',
        'pageSize': 100,
        'sortBy': 'relevance',
        'apiKey': n_key,
        'q': keyword,    
    }
    
    data_master = []
    
    for domain in domains:
        params['domains'] = domain
        response = requests.get(base_url, params)
        data = response.json()


        articles = data['articles']
        
        for index in range(0, len(articles)): 
            article_dict = {
                'Keyword': keyword,
                'Source': articles[index]['source']['name'],
                'Author': articles[index]['author'],
                'Title': articles[index]['title'],
                'URL': articles[index]['url'],
                'Text': articles[index]['content'],
                'Published': articles[index]['publishedAt']}
            
            data_master.append(article_dict)

    data_df = pd.DataFrame(data_master)
    data_df.to_csv(f'initial{keyword}NewsAPIdata{initials}.csv', index=False)

    data_df['compound score'] = data_df['Title'].apply(lambda title: SID.polarity_scores(title)['compound'])
    data_df['negative score'] = data_df['Title'].apply(lambda title: SID.polarity_scores(title)['neg'])
    data_df['positive score'] = data_df['Title'].apply(lambda title: SID.polarity_scores(title)['pos'])
    data_df['neutral score'] = data_df['Title'].apply(lambda title: SID.polarity_scores(title)['neu'])

    data_df.to_csv(f'sentiment{keyword}NewsAPIdata{initials}.csv', index=False)

In [8]:
getNewsAPIData(n_key, keyword, domains, initials)

In [9]:
sentiment_data = pd.read_csv(f'sentiment{keyword}NewsAPIdata{initials}.csv')
sentiment_data.head()

Unnamed: 0,Keyword,Source,Author,Title,URL,Text,Published,compound score,negative score,positive score,neutral score
0,refugees,The Washington Post,Nick Miroff,"Trump cuts refugee cap to lowest level ever, d...",https://www.washingtonpost.com/immigration/tru...,"The 15,000 figure, the lowest since the 1980 R...",2020-10-01T18:07:00Z,-0.8779,0.41,0.0,0.59
1,refugees,The Washington Post,Max Boot,Two new polls show the damage Trump has done t...,https://www.washingtonpost.com/opinions/2020/0...,Part of the decline is due to Trumps egregious...,2020-09-17T14:00:00Z,-0.4939,0.176,0.0,0.824
2,refugees,The Washington Post,Aaron Blake,Trump’s rally rhetoric is becoming uglier,https://www.washingtonpost.com/politics/2020/0...,"Over the course of the rallies, Trump has ampe...",2020-09-24T10:00:41Z,-0.4939,0.39,0.0,0.61
3,refugees,The Washington Post,Greg Sargent,Trump’s ugly rant in Minnesota shows why Repub...,https://www.washingtonpost.com/opinions/2020/1...,But Trumps latest rant at a rally in Minnesota...,2020-10-01T14:41:10Z,-0.891,0.624,0.0,0.376
4,refugees,The Washington Post,John Wagner,Election live updates: 9/11 events dominate sc...,https://www.washingtonpost.com/elections/2020/...,Trump again sought to paint a dark image of Am...,2020-09-11T11:27:32Z,-0.128,0.143,0.0,0.857
