In [7]:
import requests
import json
from config import n_key
from domains import domains
import os
import pandas as pd
#run pip install nltk in your terminal with your PythonData environment activated if you have not already installed nltk
import nltk 
#nltk.download('vader_lexicon') #<- you will need to run this the first time you run this code 
from nltk.sentiment.vader import SentimentIntensityAnalyzer
SID = SentimentIntensityAnalyzer()

In [8]:
# change these variables to your desired search and your initials 
keyword = 'immigrant'
initials = 'WMS'

In [9]:
# n_key is your API key for NewsAPI
# keyword is the word you are interested in searching
# source_list is the list of US news sources available in NewsAPI
# initials is a string of your initials

def getNewsAPIData(n_key, keyword, domains, initials):
    base_url = ('http://newsapi.org/v2/everything?')
    params = {
        'language': 'en',
        'pageSize': 100,
        'sortBy': 'relevance',
        'apiKey': n_key,
        'q': keyword,    
    }
    
    data_master = []
    
    for domain in domains:
        params['domains'] = domain
        response = requests.get(base_url, params)
        data = response.json()


        articles = data['articles']
        
        for index in range(0, len(articles)): 
            article_dict = {
                'Keyword': keyword,
                'Source': articles[index]['source']['name'],
                'Author': articles[index]['author'],
                'Title': articles[index]['title'],
                'URL': articles[index]['url'],
                'Text': articles[index]['content'],
                'Published': articles[index]['publishedAt']}
            
            data_master.append(article_dict)

    data_df = pd.DataFrame(data_master)
    data_df.to_csv(f'initial{keyword}NewsAPIdata{initials}.csv', index=False)

    data_df['compound score'] = data_df['Title'].apply(lambda title: SID.polarity_scores(title)['compound'])
    data_df['negative score'] = data_df['Title'].apply(lambda title: SID.polarity_scores(title)['neg'])
    data_df['positive score'] = data_df['Title'].apply(lambda title: SID.polarity_scores(title)['pos'])
    data_df['neutral score'] = data_df['Title'].apply(lambda title: SID.polarity_scores(title)['neu'])

    data_df.to_csv(f'sentiment{keyword}NewsAPIdata{initials}.csv', index=False)

In [10]:
getNewsAPIData(n_key, keyword, domains, initials)

In [11]:
sentiment_data = pd.read_csv(f'sentiment{keyword}NewsAPIdata{initials}.csv')
sentiment_data.head()

Unnamed: 0,Keyword,Source,Author,Title,URL,Text,Published,compound score,negative score,positive score,neutral score
0,immigrant,The Washington Post,Nick Miroff,Hospital where activists say ICE detainees wer...,https://www.washingtonpost.com/immigration/ice...,According to a complaint filed last week by im...,2020-09-22T20:10:00Z,0.0,0.0,0.0,1.0
1,immigrant,The Washington Post,"Nick Miroff, Devlin Barrett",ICE preparing targeted arrests in ‘sanctuary c...,https://www.washingtonpost.com/immigration/tru...,"Chad Wolf, acting secretary of the Department ...",2020-09-29T22:37:00Z,-0.4404,0.182,0.0,0.818
2,immigrant,The Washington Post,Tim Elfrink,‘They’re still not taking this seriously’: Whi...,https://www.washingtonpost.com/nation/2020/09/...,Ms. Wootens whistleblowing disclosures confirm...,2020-09-15T10:39:00Z,-0.1531,0.198,0.126,0.676
3,immigrant,The Washington Post,"James A. McCann, Michael Jones-Correa",Trump’s attacks prompted Latino immigrants to ...,https://www.washingtonpost.com/politics/2020/1...,"How has all this affected Latino immigrants, t...",2020-10-01T11:00:12Z,-0.4404,0.195,0.0,0.805
4,immigrant,The Washington Post,Steven Moore,ICE is accused of sterilizing detainees. That ...,https://www.washingtonpost.com/politics/2020/0...,Forced sterilization in the U.S.\r\nSince the ...,2020-09-25T11:00:00Z,-0.6369,0.286,0.0,0.714
