In [None]:
import pandas as pd

news_df = pd.read_csv('news_data.csv')  # adjust filename as needed
news_df['date'] = pd.to_datetime(news_df['date'])  # parse date


In [None]:
import yfinance as yf

stock_df = yf.download('AAPL', start='2024-12-01', end='2025-06-01')
stock_df.reset_index(inplace=True)  # to make 'Date' a column
stock_df['Date'] = pd.to_datetime(stock_df['Date'])


In [None]:
from textblob import TextBlob

def get_sentiment(text):
    return TextBlob(str(text)).sentiment.polarity

news_df['sentiment'] = news_df['headline'].apply(get_sentiment)


In [None]:
# Normalize date to match stock data (remove time component)
news_df['date'] = news_df['date'].dt.date
daily_sentiment = news_df.groupby('date')['sentiment'].mean().reset_index()
daily_sentiment.rename(columns={'date': 'Date', 'sentiment': 'avg_sentiment'}, inplace=True)


In [None]:
stock_df['Return'] = stock_df['Close'].pct_change()  # percentage change


In [None]:
merged_df = pd.merge(stock_df, daily_sentiment, how='inner', on='Date')
merged_df.dropna(subset=['avg_sentiment', 'Return'], inplace=True)
merged_df.head()


In [None]:
correlation = merged_df['avg_sentiment'].corr(merged_df['Return'], method='pearson')
print(f'Correlation between sentiment and return: {correlation:.4f}')


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.scatterplot(x='avg_sentiment', y='Return', data=merged_df)
plt.title(f'Sentiment vs Return (Correlation = {correlation:.2f})')
plt.xlabel('Average Daily Sentiment')
plt.ylabel('Daily Return')
plt.axhline(0, color='gray', linestyle='--')
plt.axvline(0, color='gray', linestyle='--')
plt.show()
