Get packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
from datetime import datetime

from urllib.parse import quote
import feedparser
import requests
from bs4 import BeautifulSoup
from newspaper import Article
from textblob import TextBlob

from transformers import pipeline
import torch


  from .autonotebook import tqdm as notebook_tqdm


Web scraping

In [43]:
def get_news(ticker, num_results):
    # Yahoo Finance RSS feed for a stock ticker
    ticker = str(ticker).strip().upper() 
    rss = f"https://feeds.finance.yahoo.com/rss/2.0/headline?s={quote(ticker)}&region=US&lang=en-US"
    
    feed = feedparser.parse(rss)
    news = feed.entries[:num_results]

    articles = []
    for entry in news:
        title = entry.title
        published = entry.published if "published" in entry else "N/A"
        link = entry.link
        content = get_content(link)
        
        articles.append({
            "title": title,
            "published": published,
            "link": link,
            "content": content
        })
    
    return articles


def get_content(url):
    try:
        headers = {
            "User-Agent": "Mozilla/5.0",
            "Accept-Language": "en-US,en;q=0.9"
        }
        response = requests.get(url, headers=headers, timeout=10)
        soup = BeautifulSoup(response.content, "html.parser")

        paragraphs = soup.find_all("p")
        text = " ".join(p.get_text() for p in paragraphs).strip()

        return text if text else "No content found"
    
    except requests.RequestException:
        return "content not available due to request error"
    


       

Get Sentiment

In [54]:
pipe = pipeline("text-classification", model="ProsusAI/finbert", return_all_scores=True)

def sentiment_analysis(text, max_words=300):
    if not isinstance(text, str):
        return {"error": "Invalid content"}

    words = text.split()
    chunks = [words[i:i + max_words] for i in range(0, len(words), max_words)]

    scores = {"positive": 0, "neutral": 0, "negative": 0}

    for chunk in chunks:
        chunk_text = " ".join(chunk)
        output = pipe(chunk_text)

        # Flatten output if needed
        if isinstance(output[0], list):      # case: [[{...}, {...}, ...}]]
            result_list = output[0]
        else:                                # case: [{...}, {...}, ...}]
            result_list = output

        for r in result_list:
            scores[r["label"]] += r["score"]

    dominant = max(scores, key=scores.get)
    return {"label": dominant, "scores": scores}
    
       

Device set to use cpu


Define keywords

In [None]:
tickers = ["AAPL"]
query_target = tickers

Analysis

In [55]:
for query in query_target:
    print(f"Fetching news for: {query}")
    articles = get_news(query, 10)


for article in articles:
    sentiment = sentiment_analysis(article['content'])
    article['sentiment'] = sentiment

for article in articles:
    for k,v in article.items():
        print(f"{k}: {v}")
        print("-" * 40)
        


Fetching news for: AAPL
Fetching news for: MSFT
Fetching news for: GOOGL
title: Meta wins FTC antitrust suit, will keep Instagram and WhatsApp
----------------------------------------
published: Tue, 18 Nov 2025 18:53:43 +0000
----------------------------------------
link: https://finance.yahoo.com/news/meta-wins-ftc-antitrust-suit-will-keep-instagram-and-whatsapp-185343578.html?.tsrc=rss
----------------------------------------
content: Oops, something went wrong A US district judge on Tuesday ruled against the Federal Trade Commission (FTC) in its antitrust lawsuit against social media giant Meta (META). In his ruling, Judge James Boasberg said the FTC failed to prove that Meta purchased Instagram and WhatsApp with the goal of eliminating them as competitors in the social media market. The FTC was seeking to force Meta to divest itself of both Instagram and WhatsApp, despite the commission originally approving their purchases in 2012 and 2014, respectively. Boasberg said government l