In [1]:
import requests
import pandas as pd
from datetime import datetime, timedelta
from textblob import TextBlob
import urllib.parse
import time
import os
from dotenv import load_dotenv
import re

from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch 
import torch.nn.functional as F
from tqdm import tqdm

2025-05-16 23:44:38.192792: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
load_dotenv()
api_key = os.getenv('NEWS_API_KEY')
if not api_key:
    raise ValueError("API key is missing. Set the News_API_KEY environment variable in the .env file.")
tickers = ["Apple", "Google", "Microsoft", "Amazon", "Meta", "Nvidia", "Tesla", "Netflix", "AMD", "Palantir"]
base_url = f"https://newsapi.org/v2/everything?"
sentiment_data = []
news_data = []
call_count = 0

In [11]:
for i in range(5):
    date = datetime.utcnow() - timedelta(days = i)
    from_date = to_date = date.strftime("%Y-%m-%d")

    for ticker in tickers:
        print(f"Pulling news for {ticker} on {from_date}")
        
        params = {
            "q": f'"{ticker}"',
            "searchIn": "title",
            "from": from_date,
            "to": to_date,
            "language": "en",
            "sortBy": "relevancy",
            "pageSize": 10,
            "apiKey": api_key
        }
        try:
            r = requests.get(base_url, params = params)
            r.raise_for_status()
            articles = r.json().get("articles", [])
        
        except Exception as e:
            print(f"Error fetching data from {ticker} on {from_date}: {e}")
            continue
            
        for article in articles:
            article["ticker"] = ticker
            # getting first 10 character of the timestamp string yyyy-mm-dd
            article["date"] = article.get("publishedAt", "")[:10] 
            news_data.append(article)

        time.sleep(1.1)

Pulling news for Apple on 2025-05-16


In [12]:
news_df = pd.DataFrame(news_data)
news_df

Unnamed: 0,source,author,title,description,url,urlToImage,publishedAt,content,ticker,date
0,"{'id': 'the-verge', 'name': 'The Verge'}",Andrew Liszewski,Apple Music’s new transfer tool simplifies swi...,To potentially make it easier for users of str...,https://www.theverge.com/news/668369/apple-mus...,https://platform.theverge.com/wp-content/uploa...,2025-05-16T13:34:21Z,"You can transfer songs, albums, and playlists ...",Apple,2025-05-16
1,"{'id': None, 'name': 'MacRumors'}",Juli Clover,"Apple Pay, Apple Card, Wallet and Apple Cash C...",If you're having problems using your Apple Car...,https://www.macrumors.com/2025/05/16/apple-pay...,https://images.macrumors.com/t/RQPLZ_3_iMyj3ev...,2025-05-16T16:45:54Z,If you're having problems using your Apple Car...,Apple,2025-05-16
2,"{'id': None, 'name': 'MacRumors'}",Mitchel Broussard,Best Apple Deals of the Week: Sitewide Sales H...,This week's best deals include a big sitewide ...,https://www.macrumors.com/2025/05/16/best-appl...,https://images.macrumors.com/t/d77AV3K7FCzvXhA...,2025-05-16T15:10:07Z,This week's best deals include a big sitewide ...,Apple,2025-05-16
3,"{'id': None, 'name': 'MacRumors'}",Eric Slivka,Apple Stops Signing iPadOS 17.7.7 After Report...,Apple has stopped signing the iPadOS 17.7.7 up...,https://www.macrumors.com/2025/05/15/apple-sto...,https://images.macrumors.com/t/DoYicdwGvOHw-VK...,2025-05-16T03:08:02Z,Apple has stopped signing the iPadOS 17.7.7 up...,Apple,2025-05-16
4,"{'id': None, 'name': 'MacRumors'}",Juli Clover,Apple Says Fortnite for iOS Isn't Blocked Worl...,Apple today clarified that it has not blocked ...,https://www.macrumors.com/2025/05/16/apple-for...,https://images.macrumors.com/t/_WvBzBow0b8ZAmv...,2025-05-16T16:58:29Z,Apple today clarified that it has not blocked ...,Apple,2025-05-16
5,"{'id': None, 'name': 'MacRumors'}",Juli Clover,MacRumors Giveaway: Win an Apple Watch Ultra 2...,"For this week's giveaway, we've teamed up with...",https://www.macrumors.com/2025/05/16/macrumors...,https://images.macrumors.com/t/tNvNv1VotoF8kNU...,2025-05-16T17:39:56Z,"For this week's giveaway, we've teamed up with...",Apple,2025-05-16
6,"{'id': None, 'name': 'AppleInsider'}",news@appleinsider.com (Marko Zivkovic),Apple Pay and Apple Cash experienced an outage...,An Apple Pay and Apple Cash problem left users...,https://appleinsider.com/articles/25/05/16/app...,https://photos5.appleinsider.com/gallery/63659...,2025-05-16T18:43:05Z,An Apple Pay and Apple Cash problem left users...,Apple,2025-05-16
7,"{'id': None, 'name': '9to5Mac'}",Ryan Christoffel,Apple Pay and Apple Cash are down for many iPh...,Trying to make a payment using your iPhone? Yo...,https://9to5mac.com/2025/05/16/apple-pay-and-a...,https://i0.wp.com/9to5mac.com/wp-content/uploa...,2025-05-16T16:32:45Z,Trying to make a payment using your iPhone? Yo...,Apple,2025-05-16
8,"{'id': None, 'name': '9to5Mac'}",Michael Burkhardt,Apple News+ is quickly becoming a hidden gem i...,9to5Mac is brought to you by Incogni: Protect ...,https://9to5mac.com/2025/05/16/apple-news-hidd...,https://i0.wp.com/9to5mac.com/wp-content/uploa...,2025-05-16T20:30:38Z,9to5Mac is brought to you by Incogni:Protect y...,Apple,2025-05-16
9,"{'id': None, 'name': 'Yahoo Entertainment'}",,Lopez v. Apple lawsuit: How you can file a cla...,,https://consent.yahoo.com/v2/collectConsent?se...,,2025-05-16T18:00:17Z,"If you click 'Accept all', we and our partners...",Apple,2025-05-16


In [6]:
# Only apply this cleanup to 'Apple' ticker
mask = news_df['ticker'] == 'Apple'

fruit_keywords = ['fruit', 'juice', 'pie', 'orchard', 'cider', 'farming', 'produce']

def is_apple_company(row):
    text = (str(row['title']) + ' ' + str(row['description'])).lower()
    return not any(re.search(rf'\b{kw}\b', text) for kw in fruit_keywords)

dropped = news_df[mask].loc[~news_df[mask].apply(is_apple_company, axis=1)]
print("Filtered Apple rows:")
print(dropped[['title', 'description']])

# Filter only Apple rows with the function, keep the rest untouched
news_df = pd.concat([
    news_df[mask].loc[news_df[mask].apply(is_apple_company, axis=1)], # filtered Apple articles
    news_df[~mask]  # non-Apple articles
], ignore_index=True)

KeyError: 'ticker'

In [None]:
news_df

In [7]:
news_df['source'] = news_df['source'].apply(lambda x: x.get('name') if isinstance(x, dict) else None)

In [8]:
news_df = news_df[['ticker', 'date', 'source', 'title',	'description', 'url', 'content']]

In [9]:
news_df

Unnamed: 0,ticker,date,source,title,description,url,content
0,Apple,2025-05-15,The Verge,Trump wants Apple to stop making more iPhones ...,President Donald Trump told Apple CEO Tim Cook...,https://www.theverge.com/news/667649/donald-tr...,Were not interested in you building in India.\...
1,Apple,2025-05-15,The Verge,Apple is placing warnings on EU apps that don’...,Apple is trying to dissuade Europeans from usi...,https://www.theverge.com/news/667484/apple-eu-...,Apple suggests that users are putting themselv...
2,Apple,2025-05-15,MacRumors,Apple Says These Vehicle Brands Plan to Offer ...,Apple today announced the launch of CarPlay Ul...,https://www.macrumors.com/2025/05/15/carplay-u...,Apple today announced the launch of CarPlay Ul...
3,Apple,2025-05-15,MacRumors,Apple Highlights Magnifier on Mac and iPhone M...,Apple today shared new videos that highlight t...,https://www.macrumors.com/2025/05/15/apple-vid...,Apple today shared new videos that highlight t...
4,Apple,2025-05-15,Business Insider,"Watch out, Apple. ChatGPT and Chipotle have jo...",Topping the list for the fourth year in a row ...,https://www.businessinsider.com/most-valuable-...,Apple ranked No. 1 on Kantar BrandZ's annual m...
...,...,...,...,...,...,...,...
393,Palantir,2025-05-12,Biztoc.com,Palantir Stock Targets A Breakout — But Don't ...,,https://biztoc.com/x/2a7f9d2be811e31b,"{ window.open(this.href, '_blank'); }, 200); r..."
394,Palantir,2025-05-12,Biztoc.com,"Is Palantir Stock a Buy, Sell, or Hold on New ...",,https://biztoc.com/x/dd3f63da1c541d5b,"{ window.open(this.href, '_blank'); }, 200); r..."
395,Palantir,2025-05-12,Biztoc.com,"Wynn Resorts, Microsoft, Palantir And A Health...",Zinger Key Points\n- Kevin Simpson says Palant...,https://biztoc.com/x/a49d695b302e47de,Zinger Key Points- Kevin Simpson says Palantir...
396,Palantir,2025-05-12,Biztoc.com,Palantir Technologies (NASDAQ: PLTR) Price Pre...,Shares of Palantir Technologies Inc. (NASDAQ: ...,https://biztoc.com/x/506a94877b211287,Shares of Palantir Technologies Inc. (NASDAQ: ...


In [10]:
news_df = news_df.dropna(subset = ['description'])

In [11]:
#news_df = pd.DataFrame(news_data)
print(f'The size of the dataframe: {len(news_df)}')
news_df.isna().sum()

The size of the dataframe: 345


ticker         0
date           0
source         0
title          0
description    0
url            0
content        0
dtype: int64

In [12]:
news_df.to_csv("assets/news_data.csv", index = False)

### Sentiment Analysis using FinBERT

In [None]:
# Load FinBERT model
tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")

In [14]:
def get_finbert_scores_batch(text_list):
    scores = []

    for text in text_list:
        if not text or not text.strip():
            scores.append(None)
            continue

        try:
            # Tokenize with truncation
            inputs = tokenizer(
                text,
                return_tensors="pt",
                truncation=True,
                max_length=512,
                padding=True
            )

            # Force CPU (avoid MPS crash)
            inputs = {k: v.cpu() for k, v in inputs.items()}
            model.cpu()

            with torch.no_grad():
                outputs = model(**inputs)
                logits = outputs.logits
                probs = F.softmax(logits, dim=1).squeeze()

                # Weighted score: +1*pos, -1*neg, 0*neutral
                score = probs[0].item() * 1 + probs[1].item() * -1
                scores.append(score)

        except Exception as e:
            print(f"Error on text: {e}")
            scores.append(None)

    return scores

In [15]:
news_df['text'] = news_df['title'].fillna('') + ' ' + news_df['description'].fillna('')
sentiments = []

# Batch size: adjust lower if crash continues
batch_size = 16
text_list = news_df['text'].tolist()

for i in tqdm(range(0, len(text_list), batch_size)):
    batch = text_list[i:i+batch_size]
    scores = get_finbert_scores_batch(batch)
    sentiments.extend(scores)

news_df['sentiment'] = sentiments

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  news_df['text'] = news_df['title'].fillna('') + ' ' + news_df['description'].fillna('')
100%|████████████████████████████████████████████████████████████████████████████████████████| 22/22 [00:23<00:00,  1.05s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  news_df['sentiment'] = sentiments


In [16]:
news_df

Unnamed: 0,ticker,date,source,title,description,url,content,text,sentiment
0,Apple,2025-05-15,The Verge,Trump wants Apple to stop making more iPhones ...,President Donald Trump told Apple CEO Tim Cook...,https://www.theverge.com/news/667649/donald-tr...,Were not interested in you building in India.\...,Trump wants Apple to stop making more iPhones ...,0.852582
1,Apple,2025-05-15,The Verge,Apple is placing warnings on EU apps that don’...,Apple is trying to dissuade Europeans from usi...,https://www.theverge.com/news/667484/apple-eu-...,Apple suggests that users are putting themselv...,Apple is placing warnings on EU apps that don’...,0.034329
2,Apple,2025-05-15,MacRumors,Apple Says These Vehicle Brands Plan to Offer ...,Apple today announced the launch of CarPlay Ul...,https://www.macrumors.com/2025/05/15/carplay-u...,Apple today announced the launch of CarPlay Ul...,Apple Says These Vehicle Brands Plan to Offer ...,0.992835
3,Apple,2025-05-15,MacRumors,Apple Highlights Magnifier on Mac and iPhone M...,Apple today shared new videos that highlight t...,https://www.macrumors.com/2025/05/15/apple-vid...,Apple today shared new videos that highlight t...,Apple Highlights Magnifier on Mac and iPhone M...,0.977094
4,Apple,2025-05-15,Business Insider,"Watch out, Apple. ChatGPT and Chipotle have jo...",Topping the list for the fourth year in a row ...,https://www.businessinsider.com/most-valuable-...,Apple ranked No. 1 on Kantar BrandZ's annual m...,"Watch out, Apple. ChatGPT and Chipotle have jo...",0.900195
...,...,...,...,...,...,...,...,...,...
393,Palantir,2025-05-12,Biztoc.com,Palantir Stock Targets A Breakout — But Don't ...,,https://biztoc.com/x/2a7f9d2be811e31b,"{ window.open(this.href, '_blank'); }, 200); r...",Palantir Stock Targets A Breakout — But Don't ...,0.999448
394,Palantir,2025-05-12,Biztoc.com,"Is Palantir Stock a Buy, Sell, or Hold on New ...",,https://biztoc.com/x/dd3f63da1c541d5b,"{ window.open(this.href, '_blank'); }, 200); r...","Is Palantir Stock a Buy, Sell, or Hold on New ...",0.923462
395,Palantir,2025-05-12,Biztoc.com,"Wynn Resorts, Microsoft, Palantir And A Health...",Zinger Key Points\n- Kevin Simpson says Palant...,https://biztoc.com/x/a49d695b302e47de,Zinger Key Points- Kevin Simpson says Palantir...,"Wynn Resorts, Microsoft, Palantir And A Health...",-0.999932
396,Palantir,2025-05-12,Biztoc.com,Palantir Technologies (NASDAQ: PLTR) Price Pre...,Shares of Palantir Technologies Inc. (NASDAQ: ...,https://biztoc.com/x/506a94877b211287,Shares of Palantir Technologies Inc. (NASDAQ: ...,Palantir Technologies (NASDAQ: PLTR) Price Pre...,0.400149


In [17]:
news_df.to_csv("assets/news_data.csv", index = False)

In [18]:
sentiment_data = []
grouped = news_df.groupby(['ticker', 'date'])
for (ticker, date), group in grouped:
    article_count = group['sentiment'].count()
    avg_sentiment = group['sentiment'].mean()

    sentiment_data.append({
        "ticker": ticker,
        "date": date,
        "article_count": article_count,
        "avg_sentiment": avg_sentiment
    })

In [19]:
sentiment_df = pd.DataFrame(sentiment_data)
sentiment_df

Unnamed: 0,ticker,date,article_count,avg_sentiment
0,AMD,2025-05-12,10,0.198285
1,AMD,2025-05-13,9,0.384438
2,AMD,2025-05-14,8,0.153094
3,AMD,2025-05-15,8,0.381166
4,Amazon,2025-05-12,10,-0.015021
5,Amazon,2025-05-13,10,0.54111
6,Amazon,2025-05-14,10,0.426235
7,Amazon,2025-05-15,10,0.226408
8,Apple,2025-05-12,9,0.996728
9,Apple,2025-05-13,10,0.44832


In [20]:
sentiment_df['avg_sentiment'] = sentiment_df['avg_sentiment'].round(3)

In [21]:
sentiment_df

Unnamed: 0,ticker,date,article_count,avg_sentiment
0,AMD,2025-05-12,10,0.198
1,AMD,2025-05-13,9,0.384
2,AMD,2025-05-14,8,0.153
3,AMD,2025-05-15,8,0.381
4,Amazon,2025-05-12,10,-0.015
5,Amazon,2025-05-13,10,0.541
6,Amazon,2025-05-14,10,0.426
7,Amazon,2025-05-15,10,0.226
8,Apple,2025-05-12,9,0.997
9,Apple,2025-05-13,10,0.448


In [22]:
sentiment_df.to_csv("assets/news_sentiment.csv", index = False)

In [23]:
news_df.isna().sum()

ticker         0
date           0
source         0
title          0
description    0
url            0
content        0
text           0
sentiment      0
dtype: int64