In [1]:
from eventregistry import *
import pandas as pd
er = EventRegistry(apiKey = "aba2d0bf-e9b5-4e66-bd94-37e747b764c8", allowUseOfArchive = False)

In [2]:
def get_articles(keywords=None, 
                 keywordsLoc="body", 
                 andOrKW="and", 
                 ignoreKeywords=None, 
                 andOrIKW="and", 
                 dataType="news", 
                 lang="eng", 
                 dateStart=None, 
                 dateEnd=None, 
                 maxRows=100) :
    """
    Description:
    Find articles based on specified criteria.

    Parameters:
    - keywords (str or list): Keywords to search for in articles. Can be a single string or a list of strings.
    - keywordsLoc (str): Location in the article to search for keywords. Options: "body" (default), "title", or "body,title".
    - andOrKW (str): Operator to use for combining keywords. Options: "and" (default) for all keywords, "or" for any keyword.
    - ignoreKeywords (str or list): Keywords to ignore in articles.
    - andOrIKW (str): Operator to use for combining ignored keywords. Options: "and" (default) for all ignored keywords, "or" for any ignored keyword.
    - dataType (str or list): Types of data to search. Options: "news" (default) for news content, "pr" for press releases, or "blog". 
                              If multiple data types are desired, provide them in a list (e.g., ["news", "pr"]).
    - lang (str or list): Language(s) of articles to search for. If more than one language is specified, resulting articles can be written in any of the languages.
    - dateStart (str): Start date of the time interval to search for articles (format: "YYYY-MM-DD").
    - dateEnd (str): End date of the time interval to search for articles (format: "YYYY-MM-DD").
    - maxRows (int): Maximum number of articles to retrieve.

    Returns:
    pandas.DataFrame: DataFrame containing the requested articles.
    """

    if keywords != None :
        if andOrKW == "and" :
            keywords = QueryItems.AND(keywords)
        elif andOrKW == "or" :
            keywords = QueryItems.OR(keywords)
        else :
            print(f"Error, andOrKW must be 'and' or 'or' not {andOrKW}.")
            return None
    if ignoreKeywords != None :
        if andOrIKW == "and" :
            ignoreKeywords = QueryItems.AND(ignoreKeywords)
        elif andOrIKW == "or" :
            ignoreKeywords = QueryItems.OR(ignoreKeywords)
        else :
            print(f"Error, andOrIKW must be 'and' or 'or' not {andOrIKW}.")
            return None
    
    q = QueryArticlesIter (
        keywords=keywords,
        keywordsLoc=keywordsLoc,
        ignoreKeywords=ignoreKeywords,
        dataType=dataType,
        lang=lang,
        dateStart=dateStart,
        dateEnd=dateEnd)
    df = pd.DataFrame()
    for article in q.execQuery(er, sortBy = "date", 
                               returnInfo = ReturnInfo(articleInfo = ArticleInfoFlags(concepts = True, categories = True)),
                               maxItems = maxRows) :
        # Convertir l'article en ligne de DataFrame
        df_row = df_row = pd.DataFrame({
            'uri': [article.get('uri', None)],
            'lang': [article.get('lang', None)],
            'isDuplicate': [article.get('isDuplicate', None)],
            'date': [article.get('date', None)],
            'time': [article.get('time', None)],
            'dateTime': [article.get('dateTime', None)],
            'dateTimePub': [article.get('dateTimePub', None)],
            'dataType': [article.get('dataType', None)],
            'sim': [article.get('sim', None)],
            'url': [article.get('url', None)],
            'title': [article.get('title', None)],
            'body': [article.get('body', None)],
            'source_uri': [article['source']['uri']] if 'source' in article and 'uri' in article['source'] else [None],
            'source_dataType': [article['source']['dataType']] if 'source' in article and 'dataType' in article['source'] else [None],
            'source_title': [article['source']['title']] if 'source' in article and 'title' in article['source'] else [None],
            'authors': [', '.join([author['name'] for author in article.get('authors', [])])],
            'sentiment': [article.get('sentiment', None)],
            'wgt': [article.get('wgt', None)],
            'relevance': [article.get('relevance', None)],
            'image': [article.get('image', None)],
            'eventUri': [article.get('eventUri', None)]
        })
        df = pd.concat([df, df_row], ignore_index=True)
    return df

In [3]:
res = get_articles(keywords="Barack Obama", dateStart='2024-03-01', dateEnd='2024-03-20')

In [4]:
res

Unnamed: 0,uri,lang,isDuplicate,date,time,dateTime,dateTimePub,dataType,sim,url,...,body,source_uri,source_dataType,source_title,authors,sentiment,wgt,relevance,image,eventUri
0,8038634821,eng,True,2024-03-20,14:41:53,2024-03-20T14:41:53Z,2024-03-20T14:40:47Z,news,0.000000,https://cbs12.com/news/nation-world/go-f-yours...,...,WASHINGTON (TND) -- An angry exchange between ...,cbs12.com,news,WPEC,Jackson Walker,0.027451,448641713,1,https://cbs12.com/resources/media/7a7b6797-fb8...,
1,8038621745,eng,True,2024-03-20,14:33:24,2024-03-20T14:33:24Z,2024-03-20T14:32:44Z,news,0.000000,https://fox11online.com/news/nation-world/go-f...,...,WASHINGTON (TND) -- An angry exchange between ...,fox11online.com,news,WLUK,Jackson Walker,0.027451,448641204,1,https://fox11online.com/resources/media/7a7b67...,
2,8038620507,eng,True,2024-03-20,14:32:26,2024-03-20T14:32:26Z,2024-03-20T14:31:56Z,news,0.000000,https://wsbt.com/news/nation-world/go-f-yourse...,...,WASHINGTON (TND) -- An angry exchange between ...,wsbt.com,news,WSBT,Jackson Walker,0.027451,448641146,1,https://wsbt.com/resources/media/7a7b6797-fb8d...,
3,8038616424,eng,True,2024-03-20,14:29:47,2024-03-20T14:29:47Z,2024-03-20T14:29:04Z,news,0.000000,https://cbs6albany.com/news/nation-world/go-f-...,...,WASHINGTON (TND) -- An angry exchange between ...,cbs6albany.com,news,WRGB,Jackson Walker,0.027451,448640987,1,https://cbs6albany.com/resources/media/7a7b679...,
4,8038615089,eng,True,2024-03-20,14:28:54,2024-03-20T14:28:54Z,2024-03-20T14:28:09Z,news,0.000000,https://krcrtv.com/news/nation-world/go-f-your...,...,WASHINGTON (TND) -- An angry exchange between ...,krcrtv.com,news,KRCR,Jackson Walker,0.027451,448640934,1,https://krcrtv.com/resources/media/7a7b6797-fb...,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,8038262683,eng,False,2024-03-20,11:07:07,2024-03-20T11:07:07Z,2024-03-20T11:04:59Z,news,0.466667,https://ca.news.yahoo.com/did-last-top-basketb...,...,Entering the 2023-24 men's college basketball ...,ca.news.yahoo.com,news,Yahoo,,0.050980,448628827,1,https://media.zenfs.com/en/lexington_herald_le...,eng-9406756
96,8038263752,eng,False,2024-03-20,11:05:50,2024-03-20T11:05:50Z,2024-03-20T11:05:30Z,news,0.831373,https://www.voanews.com/a/former-home-of-aung-...,...,The house where deposed Myanmar leader Aung Sa...,voanews.com,news,VOA Voice of America,,0.027451,448628750,1,https://gdb.voanews.com/4B746A7E-D159-45E5-B4A...,eng-9409638
97,8038259685,eng,True,2024-03-20,11:04:08,2024-03-20T11:04:08Z,2024-03-20T11:03:28Z,news,0.000000,https://www.dailymaverick.co.za/article/2024-0...,...,"The family property on Yangon's Inye Lake, mea...",dailymaverick.co.za,news,Daily Maverick,Reuters,-0.074510,448628648,1,https://www.dailymaverick.co.za/wp-content/upl...,
98,8038250211,eng,True,2024-03-20,10:58:40,2024-03-20T10:58:40Z,2024-03-20T10:58:08Z,news,0.000000,https://www.sunjournal.com/2024/03/19/david-mi...,...,"David Mixner, a political strategist who helpe...",sunjournal.com,news,Lewiston Sun Journal,,0.043137,448628320,1,https://www.sunjournal.com/wp-content/uploads/...,
