In [2]:
#!pip install eventregistry

In [3]:
from eventregistry import *
import pandas as pd
import json

# Load the API key from the JSON file
with open("config.json", "r") as file:
    config = json.load(file)
api_key = config["api_key"]

# Initialize EventRegistry with the API key
er = EventRegistry(apiKey=api_key, allowUseOfArchive=False)

# Define topics to search for
topics = [
    "Donald Trump", "Kamala Harris", "Israel", "Palestine", "Palestinians", "Hamas", "FEMA", "Abortion",
    "Inflation", "Unemployment", "Economy", "Dockworkers", "ILA Port"
]

# Get URIs for specified sources
source_names = ["NPR", "MSNBC", "CNN", "FOX", "Forbes"]
source_uris = [er.getNewsSourceUri(source) for source in source_names]

# List to store the names of all generated DataFrames
dataframe_names = []

# Loop through each topic, fetch articles, create a DataFrame, and save to CSV
for topic in topics:
    # Get the URI for the concept
    concept_uri = er.getConceptUri(topic)
    
    # Define the query for each topic, limited to specific sources
    q = QueryArticlesIter(
        conceptUri=concept_uri,
        sourceUri=QueryItems.OR(source_uris),
        sourceLocationUri=er.getLocationUri("United States"),  # Only US sources
    )

    # List to hold each article's data as a dictionary
    articles_data = []

    # Fetch and process articles
    for art in q.execQuery(er, sortBy="date", maxItems=500):
        articles_data.append({
            "title": art.get("title"),
            "source": art.get("source", {}).get("title"),
            "author": art.get("author"),
            "url": art.get("url"),
            "publishedAt": art.get("dateTime"),
            "content": art.get("body")
        })

    # Create a DataFrame from the list of dictionaries
    articles_df = pd.DataFrame(articles_data)
    
    # Save the DataFrame to a CSV file
    file_name = f"{topic.replace(' ', '_')}_articles.csv"
    articles_df.to_csv(file_name, index=False)

    # Dynamically set the DataFrame name based on the topic, replacing spaces with underscores
    df_name = f"{topic.replace(' ', '_')}_df"
    globals()[df_name] = articles_df

    # Append the DataFrame name to the list
    dataframe_names.append(df_name)

# Print the list of all generated DataFrame names
print("Generated DataFrames:", dataframe_names)

Generated DataFrames: ['Donald_Trump_df', 'Kamala_Harris_df', 'Israel_df', 'Palestine_df', 'Palestinians_df', 'Hamas_df', 'FEMA_df', 'Abortion_df', 'Inflation_df', 'Unemployment_df', 'Economy_df', 'Dockworkers_df', 'ILA_Port_df']


In [4]:
# Example: Access the "Donald Trump" DataFrame
Donald_Trump_df.head() # type: ignore

Unnamed: 0,title,source,author,url,publishedAt,content
0,Howard Stern tells Harris sun will 'go out' if...,Fox News,,https://www.foxnews.com/media/howard-stern-tel...,2024-10-08T22:11:42Z,"By entering your email and pushing continue, y..."
1,Some Indian American Democrats see themselves ...,NPR,,https://www.npr.org/2024/10/08/nx-s1-5131223/s...,2024-10-08T21:57:10Z,"In the warm Georgia heat, Kannan Udayarajan is..."
2,Fox News Politics: Harris on '60 minutes': Joe...,Fox News,,https://www.foxnews.com/politics/fox-news-poli...,2024-10-08T21:51:34Z,Welcome to Fox News' Politics newsletter with ...
3,Will Israel Attack Iran's Nuclear Program? Her...,Forbes,,https://www.forbes.com/sites/stephenpastis/202...,2024-10-08T21:39:57Z,"While Iran does not yet have nuclear arms, ana..."
4,Biden undermines Harris claim that Ron DeSanti...,Fox News,,https://www.foxnews.com/politics/biden-undermi...,2024-10-08T21:36:12Z,"By entering your email and pushing continue, y..."
