In [9]:
import requests
import pandas as pd

API_KEY = "87b4335861464062a4e65e9601b89c99"
query = "Tesla"

url = (
    f"https://newsapi.org/v2/everything?"
    f"q={query}&"
    f"from=2025-03-21&to=2025-04-20&"
    f"sortBy=publishedAt&"
    f"language=en&"
    f"pageSize=100&"
    f"apiKey={API_KEY}"
)

response = requests.get(url)
print(1)
articles = response.json()['articles']
# articles = response.json()
# print(articles)

df = pd.DataFrame([{
    'title': a['title'],
    'description': a['description'],
    'publishedAt': a['publishedAt'],
    'source': a['source']['name'],
    'url': a['url']
} for a in articles])

df['parsed_datetime'] = pd.to_datetime(df['publishedAt'])
print(df[['title', 'parsed_datetime']].head())

1
                                               title           parsed_datetime
0  PHOTOS: Anti-Trump protesters rally in cities ... 2025-04-20 22:41:44+00:00
1  Earnings preview: Alphabet, Tesla, Nvidia, and... 2025-04-20 22:25:31+00:00
2  After lobbying US Congress to shelve the Senat... 2025-04-20 22:14:37+00:00
3  More Homes, Few Buyers, Even Price Cuts. How t... 2025-04-20 22:14:33+00:00
4                   Stock market today: Live updates 2025-04-20 22:07:35+00:00


In [13]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# === Configuration ===
API_KEY = "87b4335861464062a4e65e9601b89c99"  # Replace with your actual key
query = "Tesla"
language = "en"
chunk_days = 5  # Split by 5-day windows
start_date = datetime(2025, 3, 21)
end_date = datetime(2025, 4, 20)

# === Collect articles ===
all_articles = []

while start_date < end_date:
    next_date = start_date + timedelta(days=chunk_days)
    from_str = start_date.strftime("%Y-%m-%d")
    to_str = next_date.strftime("%Y-%m-%d")

    url = (
        f"https://newsapi.org/v2/everything?"
        f"q={query}&"
        f"from={from_str}&to={to_str}&"
        f"sortBy=publishedAt&"
        f"language={language}&"
        f"pageSize=100&"
        f"apiKey={API_KEY}"
    )

    response = requests.get(url)
    data = response.json()

    if 'articles' in data:
        print(f"✔️  Fetched {len(data['articles'])} articles from {from_str} to {to_str}")
        all_articles.extend(data['articles'])
    else:
        print(f"❌ Error from {from_str} to {to_str}: {data.get('message', 'Unknown error')}")

    start_date = next_date

# === Convert to DataFrame ===
df = pd.DataFrame([{
    'title': a['title'],
    'description': a['description'],
    'publishedAt': a['publishedAt'],
    'source': a['source']['name'],
    'url': a['url']
} for a in all_articles])

# === Clean and deduplicate ===
df['parsed_datetime'] = pd.to_datetime(df['publishedAt'])
df.drop_duplicates(subset='title', inplace=True)
df = df.sort_values('parsed_datetime').reset_index(drop=True)

# === Final preview ===
print(f"\n📄 Total articles fetched: {len(df)}")
print(df[['title', 'parsed_datetime']].head())

✔️  Fetched 95 articles from 2025-03-21 to 2025-03-26
✔️  Fetched 96 articles from 2025-03-26 to 2025-03-31
✔️  Fetched 98 articles from 2025-03-31 to 2025-04-05
✔️  Fetched 95 articles from 2025-04-05 to 2025-04-10
✔️  Fetched 93 articles from 2025-04-10 to 2025-04-15
✔️  Fetched 97 articles from 2025-04-15 to 2025-04-20

📄 Total articles fetched: 534
                                               title           parsed_datetime
0  Ubisoft Roasts Elon Musk Over His Dubious PC G... 2025-03-26 17:27:15+00:00
1  Trump’s deregulation is one of many boons for ... 2025-03-26 17:32:14+00:00
2  Canada investigates whether Tesla wrongfully h... 2025-03-26 17:42:15+00:00
3  Tesla launching in Saudi Arabia as Musk and th... 2025-03-26 17:42:23+00:00
4  X to report first annual ad revenue growth sin... 2025-03-26 17:43:09+00:00


In [30]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import pytz


API_KEY = "87b4335861464062a4e65e9601b89c99"
query = "Tesla and Tesla Stock"
language = "en"
start_date = datetime(2025, 3, 21)
end_date = datetime(2025, 4, 20)
ny_tz = pytz.timezone("US/Eastern")

# === Store results ===
all_articles = []

# === Loop through days ===
while start_date <= end_date:
    from_str = start_date.strftime("%Y-%m-%d")
    to_str = (start_date + timedelta(days=1)).strftime("%Y-%m-%d")

    url = (
        f"https://newsapi.org/v2/everything?"
        f"q={query}&"
        f"from={from_str}&to={to_str}&"
        f"sortBy=publishedAt&"
        f"language={language}&"
        f"pageSize=100&"
        f"apiKey={API_KEY}"
    )

    response = requests.get(url)
    data = response.json()

    if 'articles' in data:
        print(f"✔️  {len(data['articles'])} articles on {from_str}")
        all_articles.extend(data['articles'])
    else:
        print(f"❌ Error on {from_str}: {data.get('message', 'Unknown error')}")

    start_date += timedelta(days=1)

# === Convert to DataFrame ===
df = pd.DataFrame([{
    'title': a['title'],
    'description': a['description'],
    'publishedAt': a['publishedAt'],
    'source': a['source']['name'],
    'url': a['url']
} for a in all_articles])

df['publishedAt_utc'] = pd.to_datetime(df['publishedAt'], utc=True)
df['publishedAt_ny'] = df['publishedAt_utc'].dt.tz_convert(ny_tz)

# === Filter to NYSE trading hours (9:30 AM to 4:00 PM EST) and weekdays ===
def is_trading_hour(ts):
    return ts.weekday() < 5 and ((ts.hour > 9 or (ts.hour == 9 and ts.minute >= 30)) and ts.hour < 16)

df = df[df['publishedAt_ny'].apply(is_trading_hour)]

# === Deduplicate and clean ===
df = df.drop_duplicates(subset='title').sort_values('publishedAt_ny').reset_index(drop=True)

# === Final preview ===
print(f"\n📄 Total NYSE-time articles: {len(df)}")
print(df[['title', 'publishedAt_ny']].head())

# === Optional: Save to CSV ===
# df.to_csv("tesla_news_trading_hours_only.csv", index=False)


❌ Error on 2025-03-21: You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.
❌ Error on 2025-03-22: You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.
❌ Error on 2025-03-23: You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.
❌ Error on 2025-03-24: You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.
❌ Error on 2025-03-25: You have made too many requests recently. Developer a

KeyboardInterrupt: 

In [18]:
df

Unnamed: 0,title,description,publishedAt,source,url,publishedAt_utc,publishedAt_ny
0,Multiple incendiary devices found at Tesla dea...,Tesla dealerships have faced attacks since Elo...,2025-03-24T18:26:47Z,ABC News,https://abcnews.go.com/US/multiple-incendiary-...,2025-03-24 18:26:47+00:00,2025-03-24 14:26:47-04:00
1,The Cost And Benefits Of Privatizing Amtrak,Amtrak CEO Stephen Gardner is stepping down af...,2025-03-24T18:26:59Z,Forbes,https://www.forbes.com/sites/paulweinstein/202...,2025-03-24 18:26:59+00:00,2025-03-24 14:26:59-04:00
2,"NYC’s fastest EV charging company, Revel, open...","Revel, the EV charging company known for its h...",2025-03-24T18:30:00Z,Electrek,http://electrek.co/2025/03/24/nycs-fastest-ev-...,2025-03-24 18:30:00+00:00,2025-03-24 14:30:00-04:00
3,FBI launches task force to nail criminals behi...,The FBI launched a task force Monday to try to...,2025-03-24T18:30:09Z,New York Post,https://nypost.com/2025/03/24/us-news/fbi-laun...,2025-03-24 18:30:09+00:00,2025-03-24 14:30:09-04:00
4,Austin Tesla dealership bomb investigation; su...,"AUSTIN, Texas - The Austin Police Department s...",2025-03-24T18:33:00Z,Biztoc.com,https://biztoc.com/x/da291d00a565a1c3,2025-03-24 18:33:00+00:00,2025-03-24 14:33:00-04:00
...,...,...,...,...,...,...,...
827,IRS Hunter Biden whistleblower Gary Shapley ou...,Treasury Secretary Scott Bessent has replaced ...,2025-04-18T19:39:47Z,New York Post,https://nypost.com/2025/04/18/us-news/irs-hunt...,2025-04-18 19:39:47+00:00,2025-04-18 15:39:47-04:00
828,Kansas City teen arrested for alleged arson at...,A 19-year-old Kansas City resident was arreste...,2025-04-18T19:44:28Z,ABC News,https://abcnews.go.com/US/kansas-city-teen-arr...,2025-04-18 19:44:28+00:00,2025-04-18 15:44:28-04:00
829,Corporate Bitcoin Holdings Hit 668K BTC In Q1 ...,Bitcoin (BTC) institutional adoption is growin...,2025-04-18T19:45:38Z,ZyCrypto,https://zycrypto.com/corporate-bitcoin-holding...,2025-04-18 19:45:38+00:00,2025-04-18 15:45:38-04:00
830,"Podcast: how Elon killed Tesla Model 2, global...","In the Electrek Podcast, we discuss the most p...",2025-04-18T19:47:09Z,Electrek,http://electrek.co/2025/04/18/podcast-how-elon...,2025-04-18 19:47:09+00:00,2025-04-18 15:47:09-04:00


Convert to NY time to match to the Stock Exchange date

In [19]:
df.to_csv("tesla_news_trading_hours_only.csv", index=False)

In [23]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import pytz

# === Config ===
API_KEY = "87b4335861464062a4e65e9601b89c99" 
query = "Tesla"
language = "en"
start_date = datetime( 2025, 4,7)
end_date = datetime(2025, 5, 8)
ny_tz = pytz.timezone("US/Eastern")

# === Collect articles day-by-day ===
all_articles = []

while start_date <= end_date:
    from_str = start_date.strftime("%Y-%m-%d")
    to_str = (start_date + timedelta(days=1)).strftime("%Y-%m-%d")

    url = (
        f"https://newsapi.org/v2/everything?"
        f"q={query}&"
        f"from={from_str}&to={to_str}&"
        f"sortBy=publishedAt&"
        f"language={language}&"
        f"pageSize=100&"
        f"apiKey={API_KEY}"
        
    )

    response = requests.get(url)
    data = response.json()

    if 'articles' in data:
        print(f"✔️  {len(data['articles'])} articles on {from_str}")
        all_articles.extend(data['articles'])
    else:
        print(f"❌ Error on {from_str}: {data.get('message', 'Unknown error')}")

    start_date += timedelta(days=1)

# === Convert to DataFrame ===
df = pd.DataFrame([{
    'title': a['title'],
    'description': a['description'],
    'publishedAt': a['publishedAt'],
    'source': a['source']['name'],
    'url': a['url']
} for a in all_articles])

df['publishedAt_utc'] = pd.to_datetime(df['publishedAt'], utc=True)
df['publishedAt_ny'] = df['publishedAt_utc'].dt.tz_convert(ny_tz)

# === Filter to trading days and NYSE hours (10 AM to 4 PM EST) ===
def is_trading_hour(ts):
    return ts.weekday() < 5 and 10 <= ts.hour < 16

df = df[df['publishedAt_ny'].apply(is_trading_hour)]

# === Group by hourly bins and limit to 15 articles per hour ===
df['hour_bin'] = df['publishedAt_ny'].dt.floor('h')
df = df.drop_duplicates(subset='title')  # optional: remove duplicates
max_per_hour = 15
df_top = df.groupby('hour_bin').apply(lambda x: x.head(max_per_hour)).reset_index(drop=True)

# === Final preview ===
print(f"\n📄 Total filtered Tesla articles (in trading hours): {len(df_top)}")
print(df_top[['title', 'publishedAt_ny', 'hour_bin']].head())

# === Optional: Save for FinBERT processing ===
# df_top.to_csv("tesla_news_filtered_nyse_hours.csv", index=False)


✔️  100 articles on 2025-04-07
✔️  93 articles on 2025-04-08
✔️  95 articles on 2025-04-09
✔️  99 articles on 2025-04-10
❌ Error on 2025-04-11: You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.
❌ Error on 2025-04-12: You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.
❌ Error on 2025-04-13: You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.
❌ Error on 2025-04-14: You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgra

In [24]:
df_top

Unnamed: 0,title,description,publishedAt,source,url,publishedAt_utc,publishedAt_ny,hour_bin
0,Musk brands Trump aide ‘dumber than a sack of ...,Zuru cofounder Nick Mowbray also wades into th...,2025-04-08T18:56:22Z,New Zealand Herald,https://www.nzherald.co.nz/world/musk-brands-t...,2025-04-08 18:56:22+00:00,2025-04-08 14:56:22-04:00,2025-04-08 14:00:00-04:00
1,Modern Wealth Adds More Than $1 Billion in Ass...,,2025-04-08T18:52:46Z,Biztoc.com,https://biztoc.com/x/568ec4b367fa4acd,2025-04-08 18:52:46+00:00,2025-04-08 14:52:46-04:00,2025-04-08 14:00:00-04:00
2,Tesla Stock Rises. What’s Driving the Move Higher,,2025-04-08T18:52:41Z,Biztoc.com,https://biztoc.com/x/40510919a762edb3,2025-04-08 18:52:41+00:00,2025-04-08 14:52:41-04:00,2025-04-08 14:00:00-04:00
3,"Brazil regulator authorizes Starlink to add 7,...",,2025-04-08T18:52:41Z,Biztoc.com,https://biztoc.com/x/b1971339c7771ebe,2025-04-08 18:52:41+00:00,2025-04-08 14:52:41-04:00,2025-04-08 14:00:00-04:00
4,Canada says counter-tariffs against some US-ma...,,2025-04-08T18:52:28Z,Biztoc.com,https://biztoc.com/x/590f876eb69e041b,2025-04-08 18:52:28+00:00,2025-04-08 14:52:28-04:00,2025-04-08 14:00:00-04:00
...,...,...,...,...,...,...,...,...
113,Trump AG Pam Bondi Only Interested In Protecti...,"Pam Bondi, Trump's new Attorney General, is wo...",2025-04-11T19:33:19Z,Crooksandliars.com,https://crooksandliars.com/2025/04/trump-ag-on...,2025-04-11 19:33:19+00:00,2025-04-11 15:33:19-04:00,2025-04-11 15:00:00-04:00
114,Get your EV questions answered at Drive Electr...,"It’s that time of year again, time for events ...",2025-04-11T19:31:42Z,Electrek,http://electrek.co/2025/04/11/get-your-ev-ques...,2025-04-11 19:31:42+00:00,2025-04-11 15:31:42-04:00,2025-04-11 15:00:00-04:00
115,These Were The Best (And Worst Stocks) To Own ...,Shares of Apple and Tesla have lost more than ...,2025-04-11T19:20:19Z,Forbes,https://www.forbes.com/sites/dereksaul/2025/04...,2025-04-11 19:20:19+00:00,2025-04-11 15:20:19-04:00,2025-04-11 15:00:00-04:00
116,Tesla FSD Version 13.X HW4 Passes Mark Rober H...,Mark Rober tested Tesla Autopilot against simu...,2025-04-11T19:14:40Z,Next Big Future,https://www.nextbigfuture.com/2025/04/tesla-fs...,2025-04-11 19:14:40+00:00,2025-04-11 15:14:40-04:00,2025-04-11 15:00:00-04:00


In [13]:
df_top.to_csv("Iter6AAPL_news_filtered_nyse_hours.csv", index=False)

In [18]:
df_top.size

400