In [None]:
import pandas as pd

# Load news data (Task 1 output)
news_df = pd.read_csv('../data/news_data.csv')  # adjust the path
news_df['date'] = pd.to_datetime(news_df['date'])

# Load stock data (Task 2 output)
stock_df = pd.read_csv('../data/stock_data.csv')  # adjust the path
stock_df['date'] = pd.to_datetime(stock_df['date'])

# Merge by date
merged_df = pd.merge(news_df, stock_df, on='date', how='inner')
merged_df.head()


In [None]:
from textblob import TextBlob

# Create a function to get sentiment polarity
def get_sentiment(text):
    return TextBlob(text).sentiment.polarity

# Apply to headlines
merged_df['sentiment'] = merged_df['headline'].apply(get_sentiment)


In [None]:
daily_sentiment = merged_df.groupby('date')['sentiment'].mean().reset_index()


In [None]:
stock_df['daily_return'] = stock_df['Close'].pct_change()  # daily % change
daily_returns = stock_df[['date', 'daily_return']]


In [None]:
correlation_df = pd.merge(daily_sentiment, daily_returns, on='date', how='inner')
correlation_df.dropna(inplace=True)  # remove any missing values
correlation_df.head()


In [None]:
correlation = correlation_df['sentiment'].corr(correlation_df['daily_return'])
print(f"Correlation between daily sentiment and stock returns: {correlation:.3f}")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10,6))
sns.scatterplot(data=correlation_df, x='sentiment', y='daily_return')
plt.title("Correlation between News Sentiment and Stock Daily Returns")
plt.xlabel("Average Daily Sentiment")
plt.ylabel("Daily Stock Returns")
plt.grid(True)
plt.show()


In [None]:
fig, ax1 = plt.subplots(figsize=(12,6))

ax1.set_xlabel('Date')
ax1.set_ylabel('Average Sentiment', color='blue')
ax1.plot(correlation_df['date'], correlation_df['sentiment'], color='blue', label='Sentiment')
ax1.tick_params(axis='y', labelcolor='blue')

ax2 = ax1.twinx()
ax2.set_ylabel('Stock Returns', color='red')
ax2.plot(correlation_df['date'], correlation_df['daily_return'], color='red', label='Returns')
ax2.tick_params(axis='y', labelcolor='red')

fig.tight_layout()
plt.title("Daily Sentiment vs. Stock Returns Over Time")
plt.show()
