# Step 1: Import libraries
import pandas as p
import yfinance as yf
from nltk.sentiment import SentimentIntensityAnalyzer
from datetime import datetime
import matplotlib.pyplot as plt

# Step 2: Load Data
# Load stock prices
ticker = 'AAPL'  # Example stock
data = yf.download(ticker, start='2024-01-01', end='2024-06-01')

# Load news headlines
news = pd.read_csv('news_data.csv')  # Columns: date, headline
news['date'] = pd.to_datetime(news['date'])

# Step 3: Sentiment Analysis
sia = SentimentIntensityAnalyzer()
news['sentiment'] = news['headline'].apply(lambda x: sia.polarity_scores(x)['compound'])

# Step 4: Aggregate daily sentiment
daily_sentiment = news.groupby(news['date'].dt.date)['sentiment'].mean().reset_index()

# Step 5: Calculate stock daily returns
data['daily_return'] = data['Adj Close'].pct_change()
stock_returns = data.reset_index()[['Date', 'daily_return']]

# Step 6: Merge data
merged = pd.merge(daily_sentiment, stock_returns, left_on='date', right_on='Date')

# Step 7: Correlation Analysis
correlation = merged['sentiment'].corr(merged['daily_return'])
print(f'Pearson Correlation: {correlation}')

# Step 8: Visualization
plt.figure(figsize=(10, 6))
plt.scatter(merged['sentiment'], merged['daily_return'])
plt.title('Sentiment vs Stock Returns')
plt.xlabel('Sentiment Score')
plt.ylabel('Daily Stock Returns')
plt.grid()
plt.show()