In [30]:
from newsplease import NewsPlease
article = NewsPlease.from_url('https://www.nytimes.com/2017/02/23/us/politics/cpac-stephen-bannon-reince-priebus.html?hp')
print(article.title)

Stephen Bannon Reassures Conservatives Uneasy About Trump


In [None]:
import requests
import csv
from datetime import datetime, timedelta
import os

# Set env variables
APCA_API_KEY_ID = os.getenv("APCA_API_KEY_ID")
APCA_API_SECRET_KEY = os.getenv("APCA_API_SECRET_KEY")

# Keywords related to macroeconomic indicators
keywords = [
    "Gross Domestic Product", "GDP", "Unemployment Rate", "Inflation Rate",
    "Consumer Price Index", "CPI", "Producer Price Index", "PPI", "Interest Rates",
    "Balance of Trade", "Government Debt", "Budget Deficit", "Surplus", "Exchange Rates",
    "Money Supply", "Industrial Production", "Retail Sales", "Housing Starts"
]

# Prepare headers for the HTTP request
headers = {
    'APCA-API-KEY-ID': APCA_API_KEY_ID,
    'APCA-API-SECRET-KEY': APCA_API_SECRET_KEY,
}

# Fetch and filter news and write to csv
def fetch_and_filter_news(start_date, end_date, batch_size=30):
    next_page_token = None
    has_more = True
    with open('macroeconomic_news.csv', 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["Date", "Headline", "Content"])

        while start_date < end_date:
            batch_end_date = start_date + timedelta(days=30)
            while has_more:
                params = {
                    'start': start_date.strftime('%Y-%m-%d'),
                    'end': batch_end_date.strftime('%Y-%m-%d'),
                    'include_content': 'true',
                    'limit': batch_size,
                }

                if next_page_token:
                    params['page_token'] = next_page_token

                response = requests.get('https://data.alpaca.markets/v1beta1/news', headers=headers, params=params)

                if response.status_code == 200:
                    news_batch = response.json()
                    for article in news_batch['news']:
                        # Keyword check
                        if any(keyword in article['headline'] or keyword in article['summary'] for keyword in keywords):
                            date = datetime.fromisoformat(article['created_at']).strftime('%m/%d/%Y')
                            headline = article['headline']
                            content = article['summary']
                            writer.writerow([date, headline, content])
                            print(date, headline, content)

                    next_page_token = news_batch.get('next_page_token')
                    has_more = next_page_token is not None
                    if not has_more:  # Reset for the next batch
                        start_date += timedelta(days=30)  # Next 30 days
                        next_page_token = None  # Reset pagination token
                        has_more = True  # Reset has_more
                else:
                    print("Failed to fetch news articles", response.status_code)
                    break

start_date = datetime.now() - timedelta(days=365*3)
end_date = datetime.now()

fetch_and_filter_news(start_date, end_date, batch_size=30)

In [2]:
import requests
import csv
from datetime import datetime, timedelta
import os
import time

# Set env variables
APCA_API_KEY_ID = os.getenv("APCA_API_KEY_ID")
APCA_API_SECRET_KEY = os.getenv("APCA_API_SECRET_KEY")

# Keywords related to macroeconomic indicators
keywords = [
    "Gross Domestic Product", "GDP", "Unemployment Rate", "Inflation Rate",
    "Consumer Price Index", "CPI", "Producer Price Index", "PPI", "Interest Rates",
    "Balance of Trade", "Government Debt", "Budget Deficit", "Surplus", "Exchange Rates",
    "Money Supply", "Industrial Production", "Retail Sales", "Housing Starts"
]

# Prepare headers for the HTTP request
headers = {
    'APCA-API-KEY-ID': APCA_API_KEY_ID,
    'APCA-API-SECRET-KEY': APCA_API_SECRET_KEY,
}

# Fetch and filter news and write to csv
def fetch_and_filter_news(end_date, batch_size=30):
    next_page_token = None
    has_more = True
    request_count = 0
    max_requests_per_minute = 200
    minute_start_time = time.time()
    retry_attempts = 3  # Maximum number of retry attempts for a failed request

    with open('macroeconomic_news.csv', 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["Date", "Headline", "Content"])

        while end_date > start_date:
            batch_start_date = end_date - timedelta(days=30)
            while has_more:
                # Check rate limit
                if request_count >= max_requests_per_minute:
                    elapsed_time = time.time() - minute_start_time
                    if elapsed_time < 60:
                        time.sleep(60 - elapsed_time)
                    request_count = 0
                    minute_start_time = time.time()

                params = {
                    'start': batch_start_date.strftime('%Y-%m-%d'),
                    'end': end_date.strftime('%Y-%m-%d'),
                    'include_content': 'true',
                    'limit': batch_size,
                }

                if next_page_token:
                    params['page_token'] = next_page_token

                for attempt in range(retry_attempts):
                    response = requests.get('https://data.alpaca.markets/v1beta1/news', headers=headers, params=params)
                    request_count += 1

                    if response.status_code == 200:
                        news_batch = response.json()
                        for article in news_batch['news']:
                            # Keyword check
                            if any(keyword in article['headline'] or keyword in article['summary'] for keyword in keywords):
                                date = datetime.fromisoformat(article['created_at']).strftime('%m/%d/%Y')
                                headline = article['headline']
                                content = article['summary']
                                writer.writerow([date, headline, content])
                                print(date, headline, content)

                        next_page_token = news_batch.get('next_page_token')
                        has_more = next_page_token is not None
                        if not has_more:  # Reset for the next batch
                            end_date -= timedelta(days=30)  # Previous 30 days
                            next_page_token = None  # Reset pagination token
                            has_more = True  # Reset has_more
                        break  # Break out of retry loop on success
                    else:
                        print(f"Failed to fetch news articles {response.status_code} on attempt {attempt + 1}")
                        if response.status_code == 400:
                            print("Bad request, skipping this batch.")
                            break
                        time.sleep(2 ** attempt)  # Exponential backoff
                else:
                    # No more retries
                    end_date -= timedelta(days=30)
                    has_more = False
                    next_page_token = None

start_date = datetime.now() - timedelta(days=365*5)
end_date = datetime.now()

fetch_and_filter_news(end_date, batch_size=30)

05/28/2024 Bank of Japan Core Consumer Price Index (YoY) 1.8% Vs. 2.2% Est.; 2.2% Prior 
05/28/2024 Reported Earlier, Australia Retail Sales (MoM) Preliminary For April 0.1% Vs. 0.3% Est.; -0.4% Prior 
05/24/2024 NewEdge Wealth's Cameron Dawson Talks Prolonged High-Interest Rates On PreMarket Prep Cameron Dawson, chief Investment Officer of NewEdge Wealth, predicts 2-year Treasury yield to be stuck between 4.5% and 5%. Higher rates benefit large tech companies but hurt smaller cap companies with high debt. Non-farm payrolls are key to signaling economic shift.
05/24/2024 When Will The Fed Cut Interest Rates? We're In 'The Middle-Of-The-Road Path Of Cutting Gradually': Goldman Sachs Goldman Sachs pushes back forecast for Fed&#39;s first rate cut due to stronger-than-expected economic data and diverse FOMC opinions.
05/24/2024 'Wow:' Elon Musk Reacts As Nvidia's Eye-Popping Market Cap Leaves Entire German Stock Market, GDPs Of 96% World Nations Biting The Dust Nvidia has positioned itsel

ConnectTimeout: HTTPSConnectionPool(host='data.alpaca.markets', port=443): Max retries exceeded with url: /v1beta1/news?start=2024-04-29&end=2024-05-29&include_content=true&limit=30&page_token=MTcxNTY5MTUyODAwMDAwMDAwMHwzODgxNTYxMA%3D%3D (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x0000023455B986E0>, 'Connection to data.alpaca.markets timed out. (connect timeout=None)'))