In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.dates import DateFormatter

# Load merged dataset
df = pd.read_csv("../data/merged_data.csv", parse_dates=["Date"])
df.head()

In [None]:
fig, ax1 = plt.subplots(figsize=(12,6))

ax1.set_title("Stock Price vs Average Sentiment")
ax1.set_xlabel("Date")
ax1.set_ylabel("Close Price", color="blue")
ax1.plot(df['Date'], df['Close'], color='blue', label='Close Price')

ax2 = ax1.twinx()
ax2.set_ylabel("Avg Sentiment", color="green")
ax2.plot(df['Date'], df['avg_sentiment'], color='green', linestyle='--', label='Avg Sentiment')

fig.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(12,6))
plt.plot(df['Date'], df['Close'], label='Close Price', color='gray')

buy_signals = df[df['signal'] == 'Buy']
sell_signals = df[df['signal'] == 'Sell']

plt.scatter(buy_signals['Date'], buy_signals['Close'], marker='^', color='green', label='Buy Signal', s=100)
plt.scatter(sell_signals['Date'], sell_signals['Close'], marker='v', color='red', label='Sell Signal', s=100)

plt.title("Buy/Sell Signals Based on Sentiment")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
sns.histplot(df['avg_sentiment'], kde=True, bins=30)
plt.title("Distribution of Daily Average Sentiment")
plt.xlabel("Sentiment Score")
plt.show()


In [None]:
df['daily_return'] = df['Close'].pct_change()
df['strategy_return'] = df['daily_return'] * df['signal'].map({'Buy': 1, 'Sell': -1, 'Hold': 0}).shift(1)

df[['daily_return', 'strategy_return']].cumsum().plot(figsize=(10, 5), title="Cumulative Returns")
plt.grid(True)
plt.show()
