In [1]:
import requests
import datetime

In [2]:
# Your Alpha Vantage API key
api_key = "5Z6FEN2QZV17FP73"

In [4]:
# List of ticker symbols to fetch news for
tickers = [
    "AAPL", "MSFT", "AMZN", "NVDA", "META", "TSLA", "GOOGL",
    "PLTR", "APP", "SEZL", "DAVE", "RTO", "EADSY", "AZN",
    "DASTY", "GE", "TSM", "CLPBY", "CSGP", "TYL"
]

In [5]:
url = (
    f"https://www.alphavantage.co/query?"
    f"function=NEWS_SENTIMENT&tickers={'AAPL'}&apikey={api_key}"
)

response = requests.get(url)
data = response.json()

# Assuming data is a dictionary:
top_items = dict(list(data.items())[:5])
print(top_items)



In [20]:
# Define the time range: today and six months ago
today = datetime.date.today()
six_months_ago = today - datetime.timedelta(days=180)

# Dictionary to store news data for each ticker
news_data = {}

# Loop over each ticker and request news sentiment data
for ticker in tickers:
    url = (
        f"https://www.alphavantage.co/query?"
        f"function=NEWS_SENTIMENT&tickers={ticker}&apikey={api_key}"
    )
    response = requests.get(url)
    data = response.json()
    
    filtered_articles = []
    
    # Check if the API returned a feed of articles
    if "feed" in data:
        for article in data["feed"]:
            # The article should have a publication time; format may vary.
            published_str = article.get("time_published")
            if not published_str:
                continue

            try:
                # Parse date using the format: YYYYMMDDT HHMMSS
                published_date = datetime.datetime.strptime(published_str, '%Y%m%dT%H%M%S').date()
            except Exception as e:
                print(f"Error parsing date {published_str}: {e}")
                continue

            # Filter articles to only those within the last six months
            if six_months_ago <= published_date <= today:
                filtered_articles.append(article)
    
    news_data[ticker] = filtered_articles

# For demonstration, print the number of articles per ticker within the specified range.
for ticker, articles in news_data.items():
    print(f"{ticker}: {len(articles)} articles from the last 6 months")

AAPL: 0 articles from the last 6 months
MSFT: 0 articles from the last 6 months
AMZN: 0 articles from the last 6 months
NVDA: 0 articles from the last 6 months
META: 0 articles from the last 6 months
TSLA: 0 articles from the last 6 months
GOOGL: 0 articles from the last 6 months
PLTR: 0 articles from the last 6 months
APP: 0 articles from the last 6 months
SEZL: 0 articles from the last 6 months
DAVE: 0 articles from the last 6 months
RTO: 0 articles from the last 6 months
EADSY: 0 articles from the last 6 months
AZN: 0 articles from the last 6 months
DASTY: 0 articles from the last 6 months
GE: 0 articles from the last 6 months
TSM: 0 articles from the last 6 months
CLPBY: 0 articles from the last 6 months
CSGP: 0 articles from the last 6 months
TYL: 0 articles from the last 6 months


### Using FinBERT for Sentiment Analysis

In [7]:
#pip install transformers torch

In [8]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Load FinBERT tokenizer and model
model_name = "ProsusAI/finbert"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
# Assuming news_data is a dictionary where each key is a ticker and its value is a list of article dictionaries
# Each article dictionary is assumed to have a "title" key that contains the article text.
stock_texts = {}

# Modified extraction block: now storing concatenated headline and summary as plain text strings.
for ticker, articles in news_data.items():
    texts = []
    for article in articles:
        if "title" in article and "summary" in article:
            concatenated_text = article["title"] + " " + article["summary"]
            texts.append(concatenated_text)
    stock_texts[ticker] = texts

# Dynamically create separate variables for each ticker using globals()
for ticker in tickers:
    globals()[f"{ticker}_articles"] = stock_texts.get(ticker, [])

# Optionally, print out a sample for verification
print("AAPL Articles:", AAPL_articles)



In [13]:
# --- Added FinBERT Initialization ---
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")
finbert = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

Device set to use mps:0


In [14]:
# --- Ensure articles_sentiment is defined by processing news_data with FinBERT ---
articles_sentiment = []  # This list will hold processed articles with sentiment info

for ticker, articles in news_data.items():
    batch_texts = []
    article_info_list = []
    for article in articles:
        if "title" in article and "summary" in article and "time_published" in article:
            try:
                # Parse publication date using the provided format
                date_obj = datetime.datetime.strptime(article["time_published"], '%Y%m%dT%H%M%S').date()
            except Exception as e:
                continue
            headline = article["title"]
            summary = article["summary"]
            full_text = headline + " " + summary  # Concatenate for FinBERT input
            batch_texts.append(full_text)
            article_info_list.append({
                "stock": ticker,
                "date": date_obj.isoformat(),
                "headline": headline,
                "summary": summary
            })
    if batch_texts:
        # Process the batch with FinBERT to obtain sentiment and confidence scores
        results = finbert(batch_texts, return_all_scores=True)
        for info, res in zip(article_info_list, results):
            confidence_scores = {d["label"].title(): d["score"] for d in res}
            sentiment = max(confidence_scores, key=confidence_scores.get)
            info["sentiment"] = sentiment
            info["confidence_scores"] = confidence_scores
            articles_sentiment.append(info)



In [15]:
# --- Modified Aggregation Block for Single Entry per Stock (Entire Six-Month Period) ---
from collections import defaultdict
aggregated = defaultdict(lambda: {"positive_count": 0, "neutral_count": 0, "negative_count": 0, "weighted_sum": 0.0, "count": 0})
for article in articles_sentiment:
    stock = article["stock"]
    # Aggregate by stock only (no monthly grouping)
    if article["sentiment"] == "Positive":
         aggregated[stock]["positive_count"] += 1
         aggregated[stock]["weighted_sum"] += article["confidence_scores"]["Positive"]
    elif article["sentiment"] == "Neutral":
         aggregated[stock]["neutral_count"] += 1
         aggregated[stock]["weighted_sum"] += 0  # Neutral contributes zero
    elif article["sentiment"] == "Negative":
         aggregated[stock]["negative_count"] += 1
         aggregated[stock]["weighted_sum"] -= article["confidence_scores"]["Negative"]
    aggregated[stock]["count"] += 1

stock_summary = []
for stock, data in aggregated.items():
   sentiment_score = data["weighted_sum"] / data["count"] if data["count"] > 0 else 0
   stock_summary.append({
       "stock": stock,
       "date": f"{six_months_ago.isoformat()} to {today.isoformat()}",
       "positive_count": data["positive_count"],
       "neutral_count": data["neutral_count"],
       "negative_count": data["negative_count"],
       "sentiment_score": sentiment_score
   })

In [None]:
#pip install tabulate

Collecting tabulate
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Installing collected packages: tabulate
Successfully installed tabulate-0.9.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
import pandas as pd

# Convert the stock_summary list of dictionaries into a DataFrame
df_stock_summary = pd.DataFrame(stock_summary)

# Define the path to your desktop and construct the full file path for the CSV file
desktop_path = os.path.join(os.path.expanduser("~"), "Desktop")
csv_path = os.path.join(desktop_path, "stock_summary.csv")

# Save the DataFrame as a CSV file on the desktop
df_stock_summary.to_csv(csv_path, index=False)

# Display the DataFrame in a tabular format for readability
print(df_stock_summary.to_markdown())

|    | stock   | date                     |   positive_count |   neutral_count |   negative_count |   sentiment_score |
|---:|:--------|:-------------------------|-----------------:|----------------:|-----------------:|------------------:|
|  0 | AAPL    | 2024-09-19 to 2025-03-18 |               18 |              23 |                9 |          0.175219 |
|  1 | MSFT    | 2024-09-19 to 2025-03-18 |               27 |              20 |                3 |          0.467823 |
|  2 | AMZN    | 2024-09-19 to 2025-03-18 |               22 |              18 |               10 |          0.264954 |
