In [2]:
import os
from newsapi import NewsApiClient
# Function to fetch news for a single API key
def fetch_news_newsapi(api_keys, keywords, from_date, to_date, language):
    newsapi = NewsApiClient(api_key=api_keys[0])
    final_articles = {keyword: [] for keyword in keywords}
    for keyword in keywords:
        page = 1
        while True:
            try:
                response = newsapi.get_everything(q=keyword,
                                                from_param=from_date,
                                                to=to_date,
                                                language=language,
                                                page=page)
                if response['status'] == 'ok':
                    final_articles[keyword] += response['articles']
                    page += 1
            except Exception as e:
                e = dict(e.args[0])
                if e['code'] == 'rateLimited':
                    print('Rate limited, switching to next API key')
                    api_keys.append(api_keys.pop(0))
                    newsapi = NewsApiClient(api_key=api_keys[0])

                elif e['code'] == 'maximumResultsReached':
                    print('Maximum results reached, changing keyword')
                    break
                else:
                    print('Unknown error: ', e)
                    break
    
    return final_articles


            




In [None]:
import datetime
from eventregistry import EventRegistry, QueryArticles, RequestArticlesInfo

def fetch_news_newasai(api_keys, keywords, from_date, to_date, language):
    """
    Fetch articles based on keywords, date range, and language using multiple API keys.

    :param api_keys: List of EventRegistry API keys
    :param keywords: List of keywords to search for
    :param start_date: Start date for the article search (YYYY-MM-DD format)
    :param end_date: End date for the article search (YYYY-MM-DD format)
    :param language: Language code for the articles (e.g., 'eng' for English)
    :return: A list of articles
    """
    if language == 'en':
        language = 'eng'
    articles = []
    start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d")
    end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d")
    
    for key in api_keys:
        er = EventRegistry(apiKey=key)
        
        # Combine keywords into a single query string
        keyword_query = " OR ".join(keywords)
        
        # Create a query for articles
        q = QueryArticles(keywords=keyword_query, lang=language)
        q.setDateLimit(start_date, end_date)
        q.addRequestedResult(RequestArticlesInfo(count=100))  # Adjust count as needed
        
        # Execute the query
        response = er.execQuery(q)
        
        if 'articles' in response:
            articles.extend(response['articles']['results'])
    
    return articles

In [15]:
import requests
import json
import time
from datetime import datetime, timedelta

def fetch_news_gnews(api_keys, keywords, from_date, to_date, language):
    final_articles = []
    used_keys = []
    # Split date range into smaller segments (e.g., daily)
    start_date = datetime.strptime(from_date, "%Y-%m-%d")
    end_date = datetime.strptime(to_date, "%Y-%m-%d")
    delta = timedelta(days=1)  # Adjust as needed
    
    while start_date <= end_date:
        from_date_segment = start_date.strftime("%Y-%m-%d") + 'T00:00:00Z'
        to_date_segment = start_date.strftime("%Y-%m-%d") + 'T23:59:59Z'
        
        for keyword in keywords:  # Iterating over individual keywords
            if len(used_keys) == len(api_keys):
                print('All API keys used')
                return final_articles
            
            response = requests.get(f'https://gnews.io/api/v4/search?q={keyword}&from={from_date_segment}&to={to_date_segment}&lang={language}&token={api_keys[0]}')
            code = response.status_code
            response = json.loads(response.text)
            
            if code == 200:
                final_articles += response['articles']
            elif code == 403:
                print('Rate limited, switching to next API key')
                api_keys.append(api_keys.pop(0))  # Rotate API keys
                used_keys.append(api_keys[-1])
            else:
                print('Unknown error: ', response)
                return final_articles
            
            time.sleep(1)  # Be respectful in your request pacing
        
        start_date += delta  # Move to the next segment
    
    return final_articles

            

arts = fetch_news_gnews(['eb6d8b1c0cf1c2daf6d6710d120352b5', '76357433dca2f5a22351ae5ecb681ce6'], ['easter', 'islam', 'christian', 'palestine', 'israel', 'pope', 'toriyama', 'apple', 'x', 'jesus', 'resurrection', 'helldivers', 'ukraine', 'putin', 'trump'], '2024-03-20', '2024-03-27', 'en')

Rate limited, switching to next API key


In [17]:
arts

[{'title': 'Orkney shop owner mistakenly orders 720 Easter eggs - more than entire population of the island',
  'description': 'After spreading word about the comical overorder around the community, Dan Dafydd came up with the idea to raffle off 100 of the Easter eggs to one lucky winner, and raise money for charity.',
  'content': 'For the Orkney island of Sanday, Easter has come early.\nFollowing a slight technical mishap on an order of festive treats, shop owner, Dan Dafydd, was delivered a whopping 720 chocolate eggs instead of the 80 he meant to buy.\nAnd with a population of... [2577 chars]',
  'url': 'https://news.sky.com/story/orkney-shop-owner-mistakenly-orders-720-easter-eggs-more-than-entire-population-of-the-island-13098797',
  'image': 'https://e3.365dm.com/24/03/1600x900/skynews-easter-eggs-orkney_6496194.jpg?20240320183913',
  'publishedAt': '2024-03-20T23:21:00Z',
  'source': {'name': 'Sky News', 'url': 'https://news.sky.com'}},
 {'title': 'Rwanda bill likely to be stal

In [20]:
len(arts)

771

In [19]:
len(set([art['url'] for art in arts]))

744

In [53]:
arts = fetch_news_newsapi(['c14142ee09054d3bbbb550eb004d90ed', 'a75bf646c7d44e7c9b6ccf12a90baf9c'], ['easter', 'islam', 'christian', 'palestine', 'israel', 'pope', 'toriyama', 'apple', 'x', 'jesus', 'resurrection', 'helldivers', 'ukraine', 'putin', 'trump'], '2024-03-20', '2024-03-27', 'en')

Maximum results reached, changing keyword
Maximum results reached, changing keyword
Maximum results reached, changing keyword
Maximum results reached, changing keyword
Maximum results reached, changing keyword
Maximum results reached, changing keyword
Maximum results reached, changing keyword
Maximum results reached, changing keyword
Maximum results reached, changing keyword
Maximum results reached, changing keyword
Rate limited, switching to next API key
Maximum results reached, changing keyword
Maximum results reached, changing keyword
Maximum results reached, changing keyword
Maximum results reached, changing keyword
Maximum results reached, changing keyword


In [54]:
len(set([article['url'] for keyword in arts for article in arts[keyword]]))

5616

In [57]:
print(arts['easter'][0])

{'source': {'id': 'bbc-news', 'name': 'BBC News'}, 'author': 'https://www.facebook.com/bbcnews', 'title': 'Cadbury store criticised for gesture eggs rebrand', 'description': 'Christian Concern says the store in Lincolnshire is trying to "erase" the Easter connection.', 'url': 'https://www.bbc.co.uk/news/uk-england-lincolnshire-68673505', 'urlToImage': 'https://ichef.bbci.co.uk/news/1024/branded_news/14CC8/production/_133029158_mediaitem133029157.jpg', 'publishedAt': '2024-03-27T12:51:45Z', 'content': 'A Cadbury store has been criticised by a Christian campaign group for advertising chocolate Easter eggs as "gesture" eggs.\r\nAccording to the Daily Telegraph, the shop in Springfields Outlet in Spaldi… [+2363 chars]'}


In [35]:
api_key = 'c14142ee09054d3bbbb550eb004d90ed'
keywords = ['easter']
from_date = '2024-03-20'
to_date = '2024-03-27'
language = 'en'
newsapi = NewsApiClient(api_key=api_key)

responses = []
for i in range(1, 6):
    response = newsapi.get_everything(q=keywords[0],
                                              from_param=from_date,
                                              to=to_date,
                                              language=language,
                                              page=i)
    print(response)
    responses += response['articles']
# {'status': 'error', 'code': 'maximumResultsReached', 'message': 'You have requested too many results. Developer accounts are limited to a max of 100 results. You are trying to request results 19980 to 20000. Please upgrade to a paid plan if you need more results.'}

{'status': 'ok', 'totalResults': 2318, 'articles': [{'source': {'id': 'bbc-news', 'name': 'BBC News'}, 'author': 'https://www.facebook.com/bbcnews', 'title': 'Cadbury store criticised for gesture eggs rebrand', 'description': 'Christian Concern says the store in Lincolnshire is trying to "erase" the Easter connection.', 'url': 'https://www.bbc.co.uk/news/uk-england-lincolnshire-68673505', 'urlToImage': 'https://ichef.bbci.co.uk/news/1024/branded_news/14CC8/production/_133029158_mediaitem133029157.jpg', 'publishedAt': '2024-03-27T12:51:45Z', 'content': 'A Cadbury store has been criticised by a Christian campaign group for advertising chocolate Easter eggs as "gesture" eggs.\r\nAccording to the Daily Telegraph, the shop in Springfields Outlet in Spaldi… [+2363 chars]'}, {'source': {'id': 'bbc-news', 'name': 'BBC News'}, 'author': 'https://www.facebook.com/bbcnews', 'title': 'King will attend Easter service at Windsor Castle', 'description': 'The King will attend with the Queen and other 

In [37]:
urls = [response['url'] for response in responses]
len(set(urls))

458

In [15]:
articles2

[{'source': {'id': None, 'name': 'The New Republic'},
  'author': 'Michael Tomasky',
  'title': 'It’s the Biden vs. Trump Economy—and Hell No, It’s Not Even Close',
  'description': 'I’m going to tell you something that I’m pretty sure you don’t know—and that you probably won’t even believe. Ready? Real wages are now growing in the United States at a pace faster than the spike in the cost of living since the pandemic. More than that: For …',
  'url': 'https://newrepublic.com/article/180093/biden-trump-economy-election-not-even-close',
  'urlToImage': 'https://images.newrepublic.com/79446c8effd0f3772a98c78a0dd612ecc22e96a5.jpeg?w=1200&h=630&crop=faces&fit=crop&fm=jpg',
  'publishedAt': '2024-03-25T10:00:00Z',
  'content': 'Biden took a pretty good swing at Trump last week when Trump raised the old Reagan question, Are you better off than you were four years ago? It was astonishingly tone-deaf timing on Trumps part, sin… [+1111 chars]'},
 {'source': {'id': None, 'name': 'Phys.Org'},
  'a