In [2]:
!pip install newsapi

Collecting newsapi
  Downloading newsapi-0.1.1-py2.py3-none-any.whl (4.1 kB)
Installing collected packages: newsapi
Successfully installed newsapi-0.1.1


In [10]:
!pip install python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-0.21.0-py3-none-any.whl (18 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-0.21.0


In [23]:
import numpy as np
import pandas as pd
import requests
#from newsapi import NewsApiClient
from datetime import datetime
from dotenv import load_dotenv, find_dotenv
import os

In [24]:
def get_newskey():
    """
    This function will fetch your NEWS_API from the .env file in the root folder.
    Your .env file should contain a like like: NEWS_API='asdfasdfasdfsadf'
    """
    env_path = find_dotenv()
    file = load_dotenv(env_path)
    return os.getenv('NEWS_API')

In [25]:
def get_news(keyword):
    """
    This function will fetch data from NEWS API based on the keyword entered. 
    The API Key required is taken from the function 'get_newskey'.
    """
    #api_key taken from the get_newskey function
    api_key = get_newskey()
    
    #base url of the API forming the basis for the request
    base_url = "https://newsapi.org/v2/everything?"
    
    #parts of the news articles that the search shall refer to. It is possible to choose between "content", "title", "content"
    search_in = "content"
    
    #criteria for sorting the output of the API
    sort = "popularity" #relevancy, popularity, publishedAt
    
    #web sources to be used
    sources = "cnn" #domains where we would like to search
    
    #date where the search shall start, default via datetime.today: the current date when the API request is made
    date = datetime.today().strftime('%Y-%m-%d')
    
    # line of code to make the actual request based on the variables defined before
    source_url = f'{base_url}q={keyword}&from="{date}"&sortBy={sort}&sources={sources}&searchIn={search_in}&apiKey={api_key}'
    
    news = requests.get(source_url, allow_redirects=True).json()

    return(news)

In [26]:
def get_urls(keyword):
    """
    This function will filter the output of the API resulting in a list 
    of the URL's of the articles included in that output from NEWS API 
    based on the keyword entered. 
    """
    
    api_result = get_news(keyword)
    
    list_of_urls = []
    
    for i in range(len(api_result['articles'])):
        
        list_of_urls.append(api_result['articles'][i]['url'])
    
    return list_of_urls

In [27]:
get_urls("house")

['https://www.cnn.com/2022/11/15/politics/house-republican-vote-kevin-mccarthy/index.html',
 'https://www.cnn.com/2022/11/16/opinions/biden-roadblocks-house-under-gop-control-zelizer/index.html',
 'https://www.cnn.com/2022/11/16/politics/virginia-mclaurin-dies/index.html',
 'https://www.cnn.com/2022/10/29/politics/nancy-pelosi-paul-pelosi-attack-statement/index.html',
 'https://www.cnn.com/2022/10/27/politics/new-york-red-wave-biden-maloney/index.html',
 'https://www.cnn.com/2022/11/14/politics/takeaways-midterm-election-analysis/index.html',
 'https://www.cnn.com/2022/10/26/politics/paul-ryan-donald-trump-president-2024/index.html',
 'https://www.cnn.com/2022/10/26/politics/kevin-mccarthy-house-gop-ukraine-funding-blank-check/index.html',
 'https://www.cnn.com/2022/11/10/politics/what-to-know-vote-counting-arizona-nevada/index.html',
 'https://www.cnn.com/2022/11/05/politics/gallery/road-to-2022-midterms/index.html',
 'https://www.cnn.com/2022/10/26/uk/king-charles-bagpiper-royal-trad