# Task 3 â€“ Sentiment vs Stock Returns Correlation

This notebook:

- Computes sentiment scores for news headlines.
- Aggregates them to daily (and optionally per-stock) sentiment.
- Computes daily stock returns.
- Aligns dates and calculates Pearson correlation between sentiment and returns.

Use this as the basis for your final report and trading-strategy discussion.


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import yfinance as yf

from src.config import ensure_data_dirs
from src.data_loading import load_news_csv, load_prices_csv
from src.sentiment import compute_textblob_sentiment
from src.correlation import (
    compute_daily_returns,
    aggregate_daily_sentiment,
    merge_sentiment_and_returns,
    pearson_correlation,
)

sns.set(style="whitegrid")
ensure_data_dirs()

# --- Parameters ---
NEWS_CSV = "fns_news.csv"  # in data/raw
TICKER = "AAPL"
START = "2020-01-01"
END = None

# --- Load data ---
news = load_news_csv(NEWS_CSV)
print(f"Loaded {len(news):,} news rows")

# Price data (yfinance by default)
prices = yf.download(TICKER, start=START, end=END)
prices = prices.reset_index().rename(columns={"Date": "Date"})
print(f"Loaded {len(prices):,} price rows")

news.head()


In [None]:
# Compute sentiment scores per headline

news_sent = compute_textblob_sentiment(news, text_col="headline")
news_sent[["headline", "sentiment_polarity", "sentiment_subjectivity"]].head()


In [None]:
# Aggregate to daily sentiment (per stock if stock column exists)

sent_daily = aggregate_daily_sentiment(
    news_sent,
    date_col="date",
    sentiment_col="sentiment_polarity",
    stock_col="stock" if "stock" in news_sent.columns else None,
)

sent_daily.head()


In [None]:
# Compute daily returns

prices_ret = compute_daily_returns(prices, close_col="Close", date_col="Date")
prices_ret[["Date", "Close", "daily_return"]].head()


In [None]:
# Merge sentiment and returns on date

merged = merge_sentiment_and_returns(sent_daily, prices_ret)
print(merged[["date", "avg_sentiment", "daily_return"]].head())

# Drop NaNs for plotting / correlation
merged_clean = merged.dropna(subset=["avg_sentiment", "daily_return"])


In [None]:
# Scatter plot and correlation

plt.figure(figsize=(6, 6))
sns.scatterplot(data=merged_clean, x="avg_sentiment", y="daily_return", alpha=0.6)
plt.axhline(0, color="grey", linewidth=0.8)
plt.axvline(0, color="grey", linewidth=0.8)
plt.xlabel("Average daily sentiment")
plt.ylabel("Daily return")
plt.title("Daily sentiment vs daily returns")
plt.tight_layout()

corr, p_val = pearson_correlation(merged_clean)
print(f"Pearson correlation: {corr:.3f} (p-value={p_val:.4f})")
