**News Data**

In [1]:
import requests
import pandas as pd
import numpy as np

pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_rows", None)

In [2]:
BASE_URL = "https://content.guardianapis.com/search"
def news_api(API_KEY: str):
    params = {
        "api-key": API_KEY,
        "from-date": "2025-03-02",
        "to-date": "2025-03-02",
        "sectionId": "business",
        # "sectionName": "business",
        "q": "US stock market AND Wall street AND US Economy AND US finance AND American business",
        "show-fields": "headline, body",
        "order-by": "newest",
        "page-size": 100
    }

    response = requests.get(BASE_URL, params)
    data = response.json()

    articles = []
    for article in data["response"]["results"]:
        articles.append({
            "Title": article["webTitle"],
            "URL": article["webUrl"],
            "Publication Date": article["webPublicationDate"],
        })

    df = pd.DataFrame(articles)
    df = df.set_index("Publication Date")

    return df

In [3]:
news_data = news_api("75e3c8c0-28e6-4166-961c-a72883c8ea3a")

In [4]:
news_data

Unnamed: 0_level_0,Title,URL
Publication Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-03-02T23:34:24Z,Greek PM vows to upgrade railways as government faces confidence vote,https://www.theguardian.com/world/2025/mar/02/greek-pm-vows-upgrade-railways-government-faces-confidence-vote
2025-03-02T23:11:18Z,Multiple wildfires in North and South Carolina force evacuations,https://www.theguardian.com/us-news/2025/mar/02/wildfires-north-south-carolina-evacuations
2025-03-02T22:37:22Z,Keir Starmer says Europe ‘at crossroads in history’ and must support Ukraine,https://www.theguardian.com/world/2025/mar/02/keir-starmer-europe-crossroads-history-must-support-ukraine
2025-03-02T21:55:33Z,Defiant but tactful Zelenskyy seeks to move on from White House fiasco,https://www.theguardian.com/world/2025/mar/02/zelenskyy-offers-no-apology-to-trump-as-he-says-row-brought-nothing-positive
2025-03-02T21:39:25Z,Trump invites freed Israeli hostage to White House,https://www.theguardian.com/us-news/2025/mar/02/trump-israeli-hostage-eli-sharabi
2025-03-02T21:36:39Z,Geyse ‘lonely’ at Manchester United as Gotham FC enter loan talks,https://www.theguardian.com/football/2025/mar/02/geyse-describes-lonely-feeling-amid-manchester-united-loan-allegations
2025-03-02T21:14:15Z,Fulham’s sensible plan punishes a fading institution with stubborn tactics | John Brewin,https://www.theguardian.com/football/2025/mar/02/fulhams-sensible-plan-punishes-fading-institution-with-ruben-amorims-stubborn-tactics
2025-03-02T21:14:12Z,Amorim hits back at Rooney after Manchester United cup exit to Fulham,https://www.theguardian.com/football/2025/mar/02/ruben-amorim-wayne-rooney-manchester-united-fulham-fa-cup
2025-03-02T21:00:17Z,Corrections and clarifications,https://www.theguardian.com/news/2025/mar/02/corrections-and-clarifications1
2025-03-02T20:26:58Z,Starmer struggles to remain upright under the weight of his contradictions | Zoe Williams,https://www.theguardian.com/politics/2025/mar/02/starmer-struggles-to-remain-upright-under-the-weight-of-his-contradictions


In [5]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import scipy
import tqdm as notebook_tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
def calculate_compound_scores(probabilities):
    return probabilities['positive'] - probabilities['negative']

In [7]:
news_titles = list(news_data["Title"].values)
print(news_titles)

['Greek PM vows to upgrade railways as government faces confidence vote', 'Multiple wildfires in North and South Carolina force evacuations', 'Keir Starmer says Europe ‘at crossroads in history’ and must support Ukraine', 'Defiant but tactful Zelenskyy seeks to move on from White House fiasco', 'Trump invites freed Israeli hostage to White House', 'Geyse ‘lonely’ at Manchester United as Gotham FC enter loan talks', 'Fulham’s sensible plan punishes a fading institution with stubborn tactics | John Brewin', 'Amorim hits back at Rooney after Manchester United cup exit to Fulham', 'Corrections and clarifications', 'Starmer struggles to remain upright under the weight of his contradictions | Zoe Williams', 'Morning Mail: AFL loses control on gambling oversight; Europe at Ukraine ‘crossroads’; WA renewables stall', 'Starmer’s plan for peace in Ukraine: key takeaways from London summit', 'Women’s Super League clubs will hold vote on radical plan to scrap relegation', 'Manchester United 1-1 Fu

In [8]:
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

inputs = tokenizer(news_titles, return_tensors="pt", padding=True, truncation=True)

with torch.no_grad():
    logits = model(**inputs).logits

probabilities = scipy.special.softmax(logits.numpy(), axis=1)
labels = list(model.config.id2label.values())

sentiment_scores = []
for i, title in enumerate(news_titles):
    scores_dict = {labels[j]: probabilities[i][j] for j in range(len(labels))}
    sentiment_scores.append(scores_dict)

compound_scores = [calculate_compound_scores(scores) for scores in sentiment_scores]
print(compound_scores)

[np.float32(0.85585856), np.float32(-0.92134804), np.float32(0.5051684), np.float32(-0.2585921), np.float32(0.02710332), np.float32(0.025343686), np.float32(-0.4260723), np.float32(0.6375139), np.float32(-0.027394693), np.float32(-0.060982056), np.float32(-0.8650409), np.float32(0.058208108), np.float32(0.021733552), np.float32(-0.023927208), np.float32(-0.0038070604), np.float32(0.073991194), np.float32(0.014752053), np.float32(0.15395576), np.float32(-0.022308543), np.float32(-0.020281158), np.float32(0.233286), np.float32(-0.8916016), np.float32(0.015455309), np.float32(0.14792088), np.float32(-0.7310732), np.float32(-0.9556407), np.float32(-0.0052545965), np.float32(0.04224082), np.float32(0.0050233454), np.float32(-0.019464374), np.float32(0.035365492), np.float32(0.7551421), np.float32(-0.82776386), np.float32(-0.17578715), np.float32(0.09845157), np.float32(0.14255872), np.float32(0.14531866), np.float32(0.3323421), np.float32(0.9259946), np.float32(-0.68804103), np.float32(0.00

In [9]:
print([float(x) for x in compound_scores])

[0.855858564376831, -0.9213480353355408, 0.5051683783531189, -0.2585920989513397, 0.027103319764137268, 0.02534368634223938, -0.42607229948043823, 0.6375138759613037, -0.027394693344831467, -0.060982055962085724, -0.8650408983230591, 0.05820810794830322, 0.02173355221748352, -0.02392720803618431, -0.003807060420513153, 0.07399119436740875, 0.01475205272436142, 0.15395575761795044, -0.02230854332447052, -0.02028115838766098, 0.23328599333763123, -0.8916016221046448, 0.015455309301614761, 0.1479208767414093, -0.7310732007026672, -0.9556406736373901, -0.005254596471786499, 0.042240820825099945, 0.005023345351219177, -0.01946437358856201, 0.03536549210548401, 0.755142092704773, -0.8277638554573059, -0.17578715085983276, 0.09845156967639923, 0.1425587236881256, 0.1453186571598053, 0.33234208822250366, 0.9259945750236511, -0.6880410313606262, 0.008813988417387009, -0.1765604019165039, 0.20970158278942108, 0.03354199603199959, 0.08203529566526413, -0.46145039796829224, 0.14147228002548218, 0.

In [10]:
len(compound_scores)

100

## start from scratch

**News Data**

In [1]:
import requests
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_rows", None)

In [11]:
def get_news_data(API_KEY: str, ticker: str, from_date: str, to_date:str):
    start = datetime.strptime(from_date, "%Y-%m-%d").date()
    end = datetime.strptime(to_date, "%Y-%m-%d").date()
    dates_generated = [start + timedelta(days=x) for x in range(0, (end-start).days+1)]

    dow_jones_companies = {
        "MMM": "3M Company",
        "AXP": "American Express",
        "AMGN": "Amgen",
        "AAPL": "Apple",
        "BA": "Boeing",
        "CAT": "Caterpillar",
        "CVX": "Chevron",
        "CSCO": "Cisco Systems",
        "KO": "Coca-Cola",
        "DOW": "Dow Inc.",
        "GS": "Goldman Sachs",
        "HD": "Home Depot",
        "HON": "Honeywell International",
        "IBM": "IBM",
        "INTC": "Intel",
        "JNJ": "Johnson & Johnson",
        "JPM": "JPMorgan Chase",
        "MCD": "McDonald's",
        "MRK": "Merck & Co.",
        "MSFT": "Microsoft",
        "NKE": "Nike",
        "PG": "Procter & Gamble",
        "CRM": "Salesforce",
        "TRV": "The Travelers Companies Inc.",
        "UNH": "UnitedHealth Group",
        "V": "Visa",
        "VZ": "Verizon",
        "WBA": "Walgreens Boots Alliance",
        "WMT": "Walmart",
        "DIS": "Disney"
    }

    articles = []
    for days in dates_generated:
        days = datetime.strftime(days, "%Y-%m-%d")

        BASE_URL = "https://content.guardianapis.com/search"
        params = {
            "api-key": API_KEY,
            "from-date": days,
            "to-date": days,
            "sectionId": "business",
            "sectionName": "business news",
            "q": f"{dow_jones_companies[ticker]}",
            "show-fields": "headline, body",
            "order-by": "newest",
            "page-size": 100
        }
        response = requests.get(BASE_URL, params)
        data = response.json()
        
        for article in data["response"]["results"]:
            articles.append({
                "Title": article["webTitle"],
                "URL": article["webUrl"],
                "Publication Date": article["webPublicationDate"],
            })
    
    df = pd.DataFrame(articles)
    df = df.set_index("Publication Date")

    return df

In [12]:
news = get_news_data("75e3c8c0-28e6-4166-961c-a72883c8ea3a", "AAPL", "2025-02-17", "2025-02-27")

In [13]:
news

Unnamed: 0_level_0,Title,URL
Publication Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-02-17T11:31:34Z,How I learned to love my alarm clock,https://www.theguardian.com/thefilter/2025/feb/14/how-i-learned-to-love-my-alarm-clock
2025-02-17T10:30:34Z,A perfect boiled egg in 32 minutes? Don’t let science ruin the joyful imperfection of home cooking | Alicia Kennedy,https://www.theguardian.com/commentisfree/2025/feb/17/boiled-egg-science-home-cooking-food
2025-02-17T07:00:27Z,Samsung Galaxy S25 Ultra review: still the superphone to beat,https://www.theguardian.com/technology/2025/feb/17/samsung-galaxy-s25-ultra-review-still-the-superphone-to-beat
2025-02-17T05:00:25Z,The White Lotus: everything you need to know before you watch season three,https://www.theguardian.com/tv-and-radio/2025/feb/17/the-white-lotus-season-three-need-to-know
2025-02-18T19:13:11Z,Zelenskyy again demands Ukraine is involved in talks after Russia and US agree to create team to explore peace deal – as it happened,https://www.theguardian.com/world/live/2025/feb/18/russia-us-ukraine-peace-talks-donald-trump-vladimir-putin-volodymyr-zelenskyy-saudi-arabia-live-latest-updates-news
2025-02-18T15:00:08Z,"Eric Adams, Trump and a New York story that’s stress-testing the rule of law",https://www.theguardian.com/us-news/2025/feb/18/eric-adams-donald-trump-corruption-case
2025-02-18T13:00:06Z,"More rhythm, less algorithm: why Deezer’s boss is vowing to put users in control of their music",https://www.theguardian.com/music/2025/feb/18/more-rhythm-less-algorithm-why-deezers-boss-is-vowing-to-put-users-in-control-of-their-music
2025-02-18T12:58:09Z,US and UK out of step with rest of the world on AI,https://www.theguardian.com/global/2025/feb/18/ai-artificial-intelligence-paris-summit-dei
2025-02-18T12:00:05Z,A quarter of US shoppers have dumped favorite stores over political stances,https://www.theguardian.com/us-news/2025/feb/18/shoppers-political-boycotts-spending-patterns-poll
2025-02-18T05:00:56Z,‘We must avoid a chilling effect’: the CMA chief on the UK’s pro-growth shift,https://www.theguardian.com/business/2025/feb/18/we-must-avoid-a-chilling-effect-the-cma-chief-on-the-uks-pro-growth-shift


In [14]:
len(news)

60

**News Sntiment**

In [15]:
import pandas as pd
import numpy as np
import scipy
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

def calculate_compound_scores(probabilities):
    return probabilities['positive'] - probabilities['negative']

def get_news_sentiment(df):

    titles = list(df["Title"].values)
    
    tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
    model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
    
    inputs = tokenizer(titles, return_tensors="pt", padding=True, truncation=True)
    
    with torch.no_grad():
        logits = model(**inputs).logits
    print("[Logits generated...]")
    print()
    
    probabilities = scipy.special.softmax(logits.numpy(), axis=1)
    labels = list(model.config.id2label.values())
    print("[Labels are listed...]")
    print()
    
    sentiment_scores = []
    for i, title in enumerate(titles):
        scores_dict = {labels[j]: probabilities[i][j] for j in range(len(labels))}
        sentiment_scores.append(scores_dict)
    print("[Sentiment scores are given...]")
    print()
    
    compound_scores = [calculate_compound_scores(scores) for scores in sentiment_scores]

    return compound_scores

  from .autonotebook import tqdm as notebook_tqdm


In [16]:
scores = get_news_sentiment(news)

[Logits generated...]

[Labels are listed...]

[Sentiment scores are given...]



In [17]:
scores = [float(x) for x in scores]
scores

[0.053536154329776764,
 0.023694958537817,
 0.7374234199523926,
 -0.004608742892742157,
 0.21465371549129486,
 0.06901916861534119,
 -0.006725773215293884,
 -0.8375101685523987,
 -0.9598056077957153,
 -0.2547241747379303,
 0.008734121918678284,
 -0.7234982848167419,
 0.1027524471282959,
 0.3217557370662689,
 -0.022469479590654373,
 -0.84148108959198,
 0.1504307985305786,
 0.0495477169752121,
 0.024260705336928368,
 0.08338617533445358,
 -0.13962668180465698,
 -0.22562026977539062,
 0.02011212706565857,
 0.20570792257785797,
 0.07290148735046387,
 0.03865300863981247,
 0.5697730779647827,
 0.00017853453755378723,
 0.003215830773115158,
 -0.8900735974311829,
 0.053853146731853485,
 0.12752550840377808,
 0.03001607209444046,
 0.2262074500322342,
 0.056911714375019073,
 0.009927257895469666,
 0.14790011942386627,
 -0.44310152530670166,
 -0.061725880950689316,
 -0.4474561810493469,
 -0.0004861988127231598,
 0.07136111706495285,
 -0.24872854351997375,
 -0.04233866184949875,
 -0.7269694209098

In [18]:
news["Sentiment Score"] = scores
news

Unnamed: 0_level_0,Title,URL,Sentiment Score
Publication Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-02-17T11:31:34Z,How I learned to love my alarm clock,https://www.theguardian.com/thefilter/2025/feb/14/how-i-learned-to-love-my-alarm-clock,0.053536
2025-02-17T10:30:34Z,A perfect boiled egg in 32 minutes? Don’t let science ruin the joyful imperfection of home cooking | Alicia Kennedy,https://www.theguardian.com/commentisfree/2025/feb/17/boiled-egg-science-home-cooking-food,0.023695
2025-02-17T07:00:27Z,Samsung Galaxy S25 Ultra review: still the superphone to beat,https://www.theguardian.com/technology/2025/feb/17/samsung-galaxy-s25-ultra-review-still-the-superphone-to-beat,0.737423
2025-02-17T05:00:25Z,The White Lotus: everything you need to know before you watch season three,https://www.theguardian.com/tv-and-radio/2025/feb/17/the-white-lotus-season-three-need-to-know,-0.004609
2025-02-18T19:13:11Z,Zelenskyy again demands Ukraine is involved in talks after Russia and US agree to create team to explore peace deal – as it happened,https://www.theguardian.com/world/live/2025/feb/18/russia-us-ukraine-peace-talks-donald-trump-vladimir-putin-volodymyr-zelenskyy-saudi-arabia-live-latest-updates-news,0.214654
2025-02-18T15:00:08Z,"Eric Adams, Trump and a New York story that’s stress-testing the rule of law",https://www.theguardian.com/us-news/2025/feb/18/eric-adams-donald-trump-corruption-case,0.069019
2025-02-18T13:00:06Z,"More rhythm, less algorithm: why Deezer’s boss is vowing to put users in control of their music",https://www.theguardian.com/music/2025/feb/18/more-rhythm-less-algorithm-why-deezers-boss-is-vowing-to-put-users-in-control-of-their-music,-0.006726
2025-02-18T12:58:09Z,US and UK out of step with rest of the world on AI,https://www.theguardian.com/global/2025/feb/18/ai-artificial-intelligence-paris-summit-dei,-0.83751
2025-02-18T12:00:05Z,A quarter of US shoppers have dumped favorite stores over political stances,https://www.theguardian.com/us-news/2025/feb/18/shoppers-political-boycotts-spending-patterns-poll,-0.959806
2025-02-18T05:00:56Z,‘We must avoid a chilling effect’: the CMA chief on the UK’s pro-growth shift,https://www.theguardian.com/business/2025/feb/18/we-must-avoid-a-chilling-effect-the-cma-chief-on-the-uks-pro-growth-shift,-0.254724
