Extraction and quantification of market sentiment from textual data such as news headlines or social media posts related to stock tickers. It fetches recent text data using AlphaVantage Global News API, preprocesses and cleans the text for analysis, and then applies the VADER sentiment analyser to assign sentiment scores (positive, negaitve, neutral and compound) to each piece of text. 
Scores are then aggregated over chosen time intervals to create a time-aligned sentiment dataset that can be merged with market price data for further modeling and visualisation. 

In [88]:
# import libraries 
import requests 
import pandas as pd 
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from datetime import datetime, timedelta
import yfinance as yf 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 

In [89]:
def fetch_alphavantage_news_df(api_url: str) -> pd.DataFrame: 
    """
    Fetches news data from Alphavantage Global News API URL and returns
    a cleaned pandas DataFrame with parsed dates.
    
    Parameters:
        api_url (str): Fully constructed API URL with key and parameters.
    
    Returns:
        pd.DataFrame: DataFrame with news articles.
    """
    response = requests.get(api_url)
    data = response.json()

    # Extract the "feed" list from the JSON response 
    news_list = data.get("feed", [])
    
    # Convert list of news dictionaries into a DataFrame
    df = pd.DataFrame(news_list)

    # Parse the published dates into datetime objects 
    df["time_published"] = pd.to_datetime(df["time_published"], errors = "coerce")

    return df

In [90]:
url = "https://www.alphavantage.co/query?function=NEWS_SENTIMENT&date= &tickers=AAPL&apikey=1D1C3X346D6ATHPG"

In [91]:
# Dynamically create the url in order to specify the ticker, date, and apikey

def build_alphavantage_news_url(ticker, date, apikey):
    url = f"https://www.alphavantage.co/query?function=NEWS_SENTIMENT&date={date}&tickers={ticker}&apikey={apikey}"
    return url

In [92]:
# Change this to dynamically fetch a different url 

news_url = build_alphavantage_news_url("AAPL", "2025-07-25","1D1C3X346D6ATHPG")

In [93]:
news_df = fetch_alphavantage_news_df(news_url)

In [94]:
# Visualise the dataframe 
print(news_df.columns.tolist())
print(news_df.head())

['title', 'url', 'time_published', 'authors', 'summary', 'banner_image', 'source', 'category_within_source', 'source_domain', 'topics', 'overall_sentiment_score', 'overall_sentiment_label', 'ticker_sentiment']
                                               title  \
0  Could Perplexity's $34.5 Billion Offer for Chr...   
1      These 3 Companies Shattered Quarterly Records   
2  Congressman Invests $6.4 Million In Magnificen...   
3  Stock Market May Enter A New Era, China Shows ...   
4  3 Mutual Funds to Buy on the Ongoing Nasdaq Rally   

                                                 url      time_published  \
0  https://www.fool.com/investing/2025/08/12/perp... 2025-08-12 22:20:03   
1  https://www.zacks.com/commentary/2703784/these... 2025-08-12 20:40:00   
2  https://www.benzinga.com/news/politics/25/08/4... 2025-08-12 18:37:55   
3  https://www.benzinga.com/markets/equities/25/0... 2025-08-12 18:28:13   
4  https://www.zacks.com/stock/news/2702965/3-mut... 2025-08-12 16:22:00 

In [95]:
# Apply VADER sentiment analysis on the news headlines using the "title" and "summary" columns

sia = SentimentIntensityAnalyzer()
def get_sentiment_scores(text): 
    if isinstance(text, str):
        return sia.polarity_scores(text)
    else:
        return {'neg': None, 'neu': None, 'pos': None, 'compound': None}


In [96]:
# Create sentiment score columns 
news_df[["neg", "neu", "pos", "compound"]] = news_df["title"].apply(get_sentiment_scores).apply(pd.Series)

In [97]:
news_df

Unnamed: 0,title,url,time_published,authors,summary,banner_image,source,category_within_source,source_domain,topics,overall_sentiment_score,overall_sentiment_label,ticker_sentiment,neg,neu,pos,compound
0,Could Perplexity's $34.5 Billion Offer for Chr...,https://www.fool.com/investing/2025/08/12/perp...,2025-08-12 22:20:03,[Adam Spatacco],Perplexity reportedly offered $34.5 billion to...,https://g.foolcdn.com/image/?url=https%3A%2F%2...,Motley Fool,,www.fool.com,"[{'topic': 'Earnings', 'relevance_score': '0.1...",0.134397,Neutral,"[{'ticker': 'MSFT', 'relevance_score': '0.1077...",0.0,1.0,0.0,0.0
1,These 3 Companies Shattered Quarterly Records,https://www.zacks.com/commentary/2703784/these...,2025-08-12 20:40:00,[Derek Lewis],"In the Q2 cycle, several companies, including ...",https://staticx-tuner.zacks.com/images/article...,Zacks Commentary,,www.zacks.com,"[{'topic': 'Earnings', 'relevance_score': '0.9...",0.392023,Bullish,"[{'ticker': 'DASH', 'relevance_score': '0.3796...",0.383,0.617,0.0,-0.4767
2,Congressman Invests $6.4 Million In Magnificen...,https://www.benzinga.com/news/politics/25/08/4...,2025-08-12 18:37:55,[Chris Katje],A Congressman bought millions of dollars in sh...,https://cdn.benzinga.com/files/images/story/20...,Benzinga,Trading,www.benzinga.com,"[{'topic': 'Technology', 'relevance_score': '0...",0.157645,Somewhat-Bullish,"[{'ticker': 'GOOG', 'relevance_score': '0.1099...",0.0,0.772,0.228,0.7096
3,"Stock Market May Enter A New Era, China Shows ...",https://www.benzinga.com/markets/equities/25/0...,2025-08-12 18:28:13,[The Arora Report],"To gain an edge, this is what you need to know...",https://www.benzinga.com/next-assets/images/sc...,Benzinga,Trading,www.benzinga.com,"[{'topic': 'Economy - Monetary', 'relevance_sc...",0.268568,Somewhat-Bullish,"[{'ticker': 'MSFT', 'relevance_score': '0.1174...",0.0,0.789,0.211,0.4939
4,3 Mutual Funds to Buy on the Ongoing Nasdaq Rally,https://www.zacks.com/stock/news/2702965/3-mut...,2025-08-12 16:22:00,[Zacks Equity Research],Nasdaq's record highs and AI-fueled tech boom ...,https://staticx-tuner.zacks.com/images/article...,Zacks Commentary,,www.zacks.com,"[{'topic': 'Economy - Monetary', 'relevance_sc...",0.285282,Somewhat-Bullish,"[{'ticker': 'JHG', 'relevance_score': '0.12502...",0.0,1.0,0.0,0.0
5,"Apple Rises 9% in a Month: Buy, Sell or Hold t...",https://www.zacks.com/stock/news/2702957/apple...,2025-08-12 16:21:00,[Zacks Investment Research],"AAPL's fiscal Q3 surge, fueled by Apple Intell...",https://staticx-tuner.zacks.com/images/article...,Zacks Commentary,,www.zacks.com,"[{'topic': 'Retail & Wholesale', 'relevance_sc...",0.345759,Somewhat-Bullish,"[{'ticker': 'MSFT', 'relevance_score': '0.1066...",0.0,1.0,0.0,0.0
6,SPOT Skyrockets 106% in a Year: How Should You...,https://www.zacks.com/stock/news/2702956/spot-...,2025-08-12 15:19:00,[Arghyadeep Bose],"Spotify grapples with ad execution issues, fie...",https://staticx-tuner.zacks.com/images/article...,Zacks Commentary,,www.zacks.com,"[{'topic': 'Retail & Wholesale', 'relevance_sc...",0.218084,Somewhat-Bullish,"[{'ticker': 'SPOT', 'relevance_score': '0.6086...",0.0,0.821,0.179,0.34
7,In-Depth Analysis: Apple Versus Competitors In...,https://www.benzinga.com/insights/news/25/08/4...,2025-08-12 15:00:53,[Benzinga Insights],Amidst the fast-paced and highly competitive b...,https://www.benzinga.com/next-assets/images/sc...,Benzinga,Markets,www.benzinga.com,"[{'topic': 'Earnings', 'relevance_score': '0.8...",0.248446,Somewhat-Bullish,"[{'ticker': 'AAPL', 'relevance_score': '0.5422...",0.0,1.0,0.0,0.0
8,5 Most-Loved ETFs of Last Week,https://www.zacks.com/stock/news/2702347/5-mos...,2025-08-12 14:00:00,[Zacks Investment Research],"ETFs saw $19B in inflows last week, led by fix...",https://staticx-tuner.zacks.com/images/default...,Zacks Commentary,,www.zacks.com,"[{'topic': 'Economy - Monetary', 'relevance_sc...",0.157978,Somewhat-Bullish,"[{'ticker': 'META', 'relevance_score': '0.0942...",0.0,1.0,0.0,0.0
9,Health & Fitness Stocks Positioned for Strong ...,https://www.zacks.com/stock/news/2702501/healt...,2025-08-12 13:35:00,[Nilanjan Choudhury],"PTON, SFM, STKL, and BODI are tapping into an ...",https://staticx-tuner.zacks.com/images/article...,Zacks Commentary,,www.zacks.com,"[{'topic': 'Retail & Wholesale', 'relevance_sc...",0.440196,Bullish,"[{'ticker': 'AAPL', 'relevance_score': '0.0646...",0.0,0.429,0.571,0.7906


In [98]:
news_df.isnull().sum()

title                      0
url                        0
time_published             0
authors                    0
summary                    0
banner_image               0
source                     0
category_within_source     0
source_domain              0
topics                     0
overall_sentiment_score    0
overall_sentiment_label    0
ticker_sentiment           0
neg                        0
neu                        0
pos                        0
compound                   0
dtype: int64

In [99]:
### Exploration of Sentiment Data 

## Structural inspection 
# Check the first few rows 
print(news_df.head())

# Check the column names 
print("\nColumn names: ")
print(news_df.columns.tolist())

# Check data types and missing values 
print("\nDataFrame info:")
print(news_df.info())

# Check for missing values 
print("\nMissing values per column:")
print(news_df.isnull().sum())

# Quick statistics for numerical columns: 
print("\nSummary statistics:")
print(news_df.describe())

                                               title  \
0  Could Perplexity's $34.5 Billion Offer for Chr...   
1      These 3 Companies Shattered Quarterly Records   
2  Congressman Invests $6.4 Million In Magnificen...   
3  Stock Market May Enter A New Era, China Shows ...   
4  3 Mutual Funds to Buy on the Ongoing Nasdaq Rally   

                                                 url      time_published  \
0  https://www.fool.com/investing/2025/08/12/perp... 2025-08-12 22:20:03   
1  https://www.zacks.com/commentary/2703784/these... 2025-08-12 20:40:00   
2  https://www.benzinga.com/news/politics/25/08/4... 2025-08-12 18:37:55   
3  https://www.benzinga.com/markets/equities/25/0... 2025-08-12 18:28:13   
4  https://www.zacks.com/stock/news/2702965/3-mut... 2025-08-12 16:22:00   

                   authors                                            summary  \
0          [Adam Spatacco]  Perplexity reportedly offered $34.5 billion to...   
1            [Derek Lewis]  In the Q2 cycle,

All the columns have the expected names and types 
There are 0 missing values 
The time_published is a datetime object which is to be expected

In [100]:
### Extract relevant ticker-level information from the ticker_sentiment column 

# Extract "ticker" and "relevance_score" into separate columns 
news_df_exploded = news_df.explode("ticker_sentiment").reset_index(drop = True)
ticker_sentiment_expanded = pd.json_normalize(news_df_exploded["ticker_sentiment"])
news_df_final = pd.concat([news_df_exploded.drop(columns = ["ticker_sentiment"]), ticker_sentiment_expanded], axis = 1)
news_df_final.drop(columns = "category_within_source", inplace = True)
news_df_final

Unnamed: 0,title,url,time_published,authors,summary,banner_image,source,source_domain,topics,overall_sentiment_score,overall_sentiment_label,neg,neu,pos,compound,ticker,relevance_score,ticker_sentiment_score,ticker_sentiment_label
0,Could Perplexity's $34.5 Billion Offer for Chr...,https://www.fool.com/investing/2025/08/12/perp...,2025-08-12 22:20:03,[Adam Spatacco],Perplexity reportedly offered $34.5 billion to...,https://g.foolcdn.com/image/?url=https%3A%2F%2...,Motley Fool,www.fool.com,"[{'topic': 'Earnings', 'relevance_score': '0.1...",0.134397,Neutral,0.0,1.0,0.0,0.0,MSFT,0.107736,0.067655,Neutral
1,Could Perplexity's $34.5 Billion Offer for Chr...,https://www.fool.com/investing/2025/08/12/perp...,2025-08-12 22:20:03,[Adam Spatacco],Perplexity reportedly offered $34.5 billion to...,https://g.foolcdn.com/image/?url=https%3A%2F%2...,Motley Fool,www.fool.com,"[{'topic': 'Earnings', 'relevance_score': '0.1...",0.134397,Neutral,0.0,1.0,0.0,0.0,GOOG,0.412017,0.103481,Neutral
2,Could Perplexity's $34.5 Billion Offer for Chr...,https://www.fool.com/investing/2025/08/12/perp...,2025-08-12 22:20:03,[Adam Spatacco],Perplexity reportedly offered $34.5 billion to...,https://g.foolcdn.com/image/?url=https%3A%2F%2...,Motley Fool,www.fool.com,"[{'topic': 'Earnings', 'relevance_score': '0.1...",0.134397,Neutral,0.0,1.0,0.0,0.0,NVDA,0.053992,0.016992,Neutral
3,Could Perplexity's $34.5 Billion Offer for Chr...,https://www.fool.com/investing/2025/08/12/perp...,2025-08-12 22:20:03,[Adam Spatacco],Perplexity reportedly offered $34.5 billion to...,https://g.foolcdn.com/image/?url=https%3A%2F%2...,Motley Fool,www.fool.com,"[{'topic': 'Earnings', 'relevance_score': '0.1...",0.134397,Neutral,0.0,1.0,0.0,0.0,AAPL,0.053992,0.069959,Neutral
4,Could Perplexity's $34.5 Billion Offer for Chr...,https://www.fool.com/investing/2025/08/12/perp...,2025-08-12 22:20:03,[Adam Spatacco],Perplexity reportedly offered $34.5 billion to...,https://g.foolcdn.com/image/?url=https%3A%2F%2...,Motley Fool,www.fool.com,"[{'topic': 'Earnings', 'relevance_score': '0.1...",0.134397,Neutral,0.0,1.0,0.0,0.0,SFTBF,0.053992,0.016992,Neutral
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
246,Apple's $600 Billion U.S. Investment Could Res...,https://www.fool.com/investing/2025/08/10/appl...,2025-08-10 07:45:00,[Patrick Sanders],Investors are hoping that Wednesday's announce...,https://g.foolcdn.com/image/?url=https%3A%2F%2...,Motley Fool,www.fool.com,"[{'topic': 'Retail & Wholesale', 'relevance_sc...",0.257684,Somewhat-Bullish,0.0,1.0,0.0,0.0,GFS,0.050894,0.173379,Somewhat-Bullish
247,Apple's $600 Billion U.S. Investment Could Res...,https://www.fool.com/investing/2025/08/10/appl...,2025-08-10 07:45:00,[Patrick Sanders],Investors are hoping that Wednesday's announce...,https://g.foolcdn.com/image/?url=https%3A%2F%2...,Motley Fool,www.fool.com,"[{'topic': 'Retail & Wholesale', 'relevance_sc...",0.257684,Somewhat-Bullish,0.0,1.0,0.0,0.0,AMZN,0.050894,0.082445,Neutral
248,Apple's AI Momentum Is Building -- Here's What...,https://www.fool.com/investing/2025/08/09/appl...,2025-08-09 17:00:00,[Stefon Walters],Apple plans to ramp up investments to catch up...,https://media.ycharts.com/charts/5f2f4d9e75818...,Motley Fool,www.fool.com,"[{'topic': 'Financial Markets', 'relevance_sco...",0.242728,Somewhat-Bullish,0.0,1.0,0.0,0.0,MSFT,0.062271,-0.108531,Neutral
249,Apple's AI Momentum Is Building -- Here's What...,https://www.fool.com/investing/2025/08/09/appl...,2025-08-09 17:00:00,[Stefon Walters],Apple plans to ramp up investments to catch up...,https://media.ycharts.com/charts/5f2f4d9e75818...,Motley Fool,www.fool.com,"[{'topic': 'Financial Markets', 'relevance_sco...",0.242728,Somewhat-Bullish,0.0,1.0,0.0,0.0,AAPL,0.815863,0.410997,Bullish


In [182]:
### Aggregate sentiment data at the level appropriate such that it matches the market data- daily per ticker 

# Sort the dataframe by ticker 
news_df_sorted = news_df_final.sort_values(by = "ticker")

# Convert time_published to date only 
news_df_sorted["time_published"] = pd.to_datetime(news_df_sorted["time_published"], errors = "coerce")
news_df_sorted["date"] = news_df_sorted["time_published"].dt.day

# Sort the values by ticker and date 
news_df_sorted.sort_values(by = ["ticker", "date"], inplace = True)

## Combine the multiple sentiment scores into a single summary value per group 

# New column with weighted sentiment per article 
news_df_sorted["ticker_sentiment_score"] = pd.to_numeric(news_df_sorted["ticker_sentiment_score"], errors = "coerce")
news_df_sorted["relevance_score"] = pd.to_numeric(news_df_sorted["relevance_score"], errors = "coerce")
news_df_sorted["weighted_sentiment_score"] = news_df_sorted["ticker_sentiment_score"] * news_df_sorted["relevance_score"]

# Group by ticket and date and then aggregate to sum the weighted sentiments, relevance scores and compute the weighted average sentiment by dividng these sums for each news article 
grouped = news_df_sorted.groupby(["ticker", "date"]).agg(
    total_weighted_sentiment = ("weighted_sentiment_score", "sum"),
    total_relevance = ("relevance_score", "sum")
).reset_index()  # optional, to turn MultiIndex into columns

grouped["weighted_avg_sentiment"] = grouped["total_weighted_sentiment"] / grouped["total_relevance"]

grouped = grouped.rename(columns = {"ticker": "Ticker", "date":"Date"})
grouped

Unnamed: 0,Ticker,Date,total_weighted_sentiment,total_relevance,weighted_avg_sentiment
0,AAPL,9,0.335317,0.815863,0.410997
1,AAPL,10,0.727302,2.276524,0.319479
2,AAPL,11,1.734102,7.389001,0.234687
3,AAPL,12,1.651391,6.584226,0.250810
4,ABNB,10,0.000000,0.074576,0.000000
...,...,...,...,...,...
116,UPST,11,0.008625,0.232598,0.037082
117,WBD,11,-0.002638,0.068698,-0.038393
118,WLDS,10,0.003150,0.050894,0.061897
119,XIACY,11,0.008716,0.045830,0.190192


In [102]:
### Coordinate the sentiment data and the market data 

# Determine date range from sentiment data to define the market data window
start_date = (news_df_final["time_published"]).dt.date.min()
end_date = (news_df_final["time_published"]).dt.date.max()

# Extract unique tickers from news_df_final["ticker"]
unique_tickers = news_df_final["ticker"].unique()

print(start_date, end_date)

2025-08-09 2025-08-12


In [None]:
unique_tickers

array(['MSFT', 'GOOG', 'NVDA', 'AAPL', 'SFTBF', 'DASH', 'ETN', 'META',
       'MDNDF', 'OPEN', 'TSM', 'BMNR', 'SOUN', 'TSLA', 'AMD', 'HOOD',
       'UBER', 'AMZN', 'PLTR', 'IVZ', 'JHG', 'SSNLF', 'AMAT', 'AVGO',
       'GFS', 'TXN', 'SPOT', 'STKL', 'PTON', 'SFM', 'BODY', 'GLW', 'INTC',
       'AXP', 'BAC', 'KO', 'BRK-A', 'ROKU', 'XOM', 'GE', 'SPGI', 'LCID',
       'RIVN', 'SNOW', 'MS', 'ON', 'MU', 'ASCCF', 'FOREX:AMD', 'BABA',
       'TCTZF', 'FOREX:USD', 'NFLX', 'SHOP', 'WBD', 'TRI', 'C', 'XIACY',
       'APP', 'BNXYF', 'CDNS', 'MELI', 'UPST', 'EFX', 'TRU', 'SVNDF',
       'UNP', 'KHC', 'NSC', 'TTD', 'BSQKZ', 'F', 'GM', 'SMCI', 'DDOG',
       'ABNB', 'DJT', 'HYMLF', 'MP', 'WLDS', 'AMKR'], dtype=object)

In [130]:
# Check Yahoo Finance download results

data = yf.download(unique_tickers.tolist(), start=start_date, end=end_date, group_by='ticker')
print("Downloaded data shape:", data.shape)
print("Downloaded data columns:", data.columns[:10])  # first 10 for preview
print("First few rows:\n", data.head())

  data = yf.download(unique_tickers.tolist(), start=start_date, end=end_date, group_by='ticker')
[*********************100%***********************]  81 of 81 completed

3 Failed downloads:
['BODY', 'FOREX:USD', 'FOREX:AMD']: YFTzMissingError('possibly delisted; no timezone found')


Downloaded data shape: (1, 408)
Downloaded data columns: MultiIndex([('SFTBF',   'Open'),
            ('SFTBF',   'High'),
            ('SFTBF',    'Low'),
            ('SFTBF',  'Close'),
            ('SFTBF', 'Volume'),
            ( 'DASH',   'Open'),
            ( 'DASH',   'High'),
            ( 'DASH',    'Low'),
            ( 'DASH',  'Close'),
            ( 'DASH', 'Volume')],
           names=['Ticker', 'Price'])
First few rows:
 Ticker     SFTBF                                           DASH              \
Price       Open       High   Low      Close Volume        Open        High   
Date                                                                          
2025-08-11  94.0  96.910004  94.0  95.440002   6400  258.190002  260.140015   

Ticker                                       ...  WLDS                    \
Price              Low       Close   Volume  ...  Open  High   Low Close   
Date                                         ...                           
2025-08-11  

In [131]:
data

Ticker,SFTBF,SFTBF,SFTBF,SFTBF,SFTBF,DASH,DASH,DASH,DASH,DASH,...,WLDS,WLDS,WLDS,WLDS,WLDS,AMAT,AMAT,AMAT,AMAT,AMAT
Price,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume,...,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2025-08-11,94.0,96.910004,94.0,95.440002,6400,258.190002,260.140015,251.130005,256.089996,4738900,...,1.84,1.93,1.45,1.47,20181400,187.149994,189.720001,183.779999,184.380005,7816000


In [169]:
# Download data from yfinance dataset 

def fetch_and_process_daily_data(tickers, start_date, end_date):
    if isinstance(tickers, str):
        tickers = [tickers]

    df = yf.download(tickers.tolist(), start=start_date, end=end_date, interval="1d", group_by=None)

    if isinstance(df.columns, pd.MultiIndex):
        df = df.stack(level=0, future_stack=True).reset_index()
        df = df.rename(columns={'level_1': 'Ticker'})
    else:
        df = df.reset_index()
        df['Ticker'] = tickers[0]

    df["log_return"] = df.groupby("Ticker")["Close"].transform(lambda x: np.log(x / x.shift(1)))
    print(df[["Ticker", "Date", "Close", "log_return"]].head(10))
    print(df.groupby("Ticker").size())
    df = df.dropna(subset=["log_return"])

    return df


In [171]:
final_df = fetch_and_process_daily_data(unique_tickers, "2025-03-03", "2025-08-12")

  df = yf.download(tickers.tolist(), start=start_date, end=end_date, interval="1d", group_by=None)
[*********************100%***********************]  81 of 81 completed

3 Failed downloads:
['BODY', 'FOREX:USD', 'FOREX:AMD']: YFTzMissingError('possibly delisted; no timezone found')


Price Ticker       Date       Close  log_return
0       DASH 2025-03-03  198.100006         NaN
1       UPST 2025-03-03   61.689999         NaN
2        TTD 2025-03-03   67.169998         NaN
3       AMZN 2025-03-03  205.020004         NaN
4      SFTBF 2025-03-03   52.514553         NaN
5       BABA 2025-03-03  128.635818         NaN
6       AMKR 2025-03-03   20.095522         NaN
7        XOM 2025-03-03  106.776573         NaN
8         KO 2025-03-03   71.283394         NaN
9         GE 2025-03-03  201.290985         NaN
Ticker
AAPL     112
ABNB     112
AMAT     112
AMD      112
AMKR     112
        ... 
UPST     112
WBD      112
WLDS     112
XIACY    112
XOM      112
Length: 81, dtype: int64


  result = getattr(ufunc, method)(*inputs, **kwargs)


In [202]:
### Process sentiment data 

# Group by date & ticker to get daily average sentiment using weighted sentiment score
sentiment_daily = (
    grouped
    .groupby(["Date", "Ticker"])["weighted_avg_sentiment"]
    .mean()
    .reset_index()
)

# Make sure both are Date columns are datetime 
final_df["Date"] = pd.to_datetime(final_df["Date"])
sentiment_daily["Date"] = pd.to_datetime(sentiment_daily["Date"])

### Process market data
final_df["Date"] = pd.to_datetime(final_df["Date"])

### Merge market and sentiment data 
merged_df = pd.merge(final_df, sentiment_daily, on=["Date", "Ticker"], how="left")

In [204]:
merged_df["weighted_avg_sentiment"].isnull().size

8588