In [1]:
import requests
import json
from datetime import datetime, timedelta

API_KEY = "9c56272ec3ef4a73a5bfe06c8bc1a4e9"
URL = "https://newsapi.org/v2/everything"

In [2]:
def fetch_articles(query, from_date, to_date, page_size=100):
    params = {
        "q": query,
        "from": from_date,
        "to": to_date,
        "language": "en",
        "pageSize": page_size,
        "searchIn": "title,description",  # Limit search to title and description
        "apiKey": API_KEY,
    }
    response = requests.get(URL, params=params)
    data = response.json()

    if data["status"] != "ok":
        print(f"Error: {data['message']}")
        return []

    return data.get("articles", [])


def save_articles_to_json(articles, filename):
    with open(filename, "w") as file:
        json.dump(articles, file, indent=4)


# Function to split date range into weekly intervals
def date_range_splitter(start_date, end_date, delta=7):
    date_ranges = []
    current_date = start_date
    while current_date < end_date:
        next_date = current_date + timedelta(days=delta)
        date_ranges.append(
            (current_date.strftime("%Y-%m-%d"), next_date.strftime("%Y-%m-%d"))
        )
        current_date = next_date
    return date_ranges

In [3]:
# Set the date range for the last month
today = datetime.now()
start_date = datetime(2024, 9, 16)
date_ranges = date_range_splitter(
    start_date, today, delta=7
)  # Split into weekly intervals

all_trump_articles = []
all_harris_articles = []

# Fetch articles for each date range
for from_date, to_date in date_ranges:
    print(f"Fetching Trump articles from {from_date} to {to_date}")
    trump_articles = fetch_articles("Donald Trump", from_date, to_date)
    all_trump_articles.extend(trump_articles)

    print(f"Fetching Harris articles from {from_date} to {to_date}")
    harris_articles = fetch_articles("Kamala Harris", from_date, to_date)
    all_harris_articles.extend(harris_articles)

# Save the articles in dataset/ folder
save_articles_to_json(all_trump_articles, "dataset/trump_articles.json")
save_articles_to_json(all_harris_articles, "dataset/harris_articles.json")

print(
    f"Saved {len(all_trump_articles)} Trump articles and {len(all_harris_articles)} Harris articles."
)

Fetching Trump articles from 2024-09-16 to 2024-09-23
Fetching Harris articles from 2024-09-16 to 2024-09-23
Fetching Trump articles from 2024-09-23 to 2024-09-30
Fetching Harris articles from 2024-09-23 to 2024-09-30
Fetching Trump articles from 2024-09-30 to 2024-10-07
Fetching Harris articles from 2024-09-30 to 2024-10-07
Fetching Trump articles from 2024-10-07 to 2024-10-14
Fetching Harris articles from 2024-10-07 to 2024-10-14
Fetching Trump articles from 2024-10-14 to 2024-10-21
Fetching Harris articles from 2024-10-14 to 2024-10-21
Saved 500 Trump articles and 500 Harris articles.
