# NVDA Exploratory Data Analysis
This notebook uses the processed feature dataset generated by `python run_all.py`.

In [None]:
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style='whitegrid')

ROOT = Path.cwd()
if not (ROOT / 'data').exists():
    ROOT = ROOT.parent

features_path = ROOT / 'data' / 'processed' / 'features.csv'
df = pd.read_csv(features_path, parse_dates=['Date'], index_col='Date')
df.head()

In [None]:
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(df.index, df['Close'], color='tab:blue')
ax.set_title('NVDA Close Price')
ax.set_xlabel('Date')
ax.set_ylabel('Close Price (USD)')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(9, 5))
ax.hist(df['fwd_ret_3d'], bins=60, color='tab:orange', edgecolor='black', alpha=0.8)
ax.set_title('Histogram of 3-Day Forward Returns (Continuous)')
ax.set_xlabel('3-Day Forward Return')
ax.set_ylabel('Frequency')
plt.show()

In [None]:
class_counts = df['y_3d'].value_counts().sort_index()
fig, ax = plt.subplots(figsize=(6, 4))
ax.bar(['0 (Non-Positive)', '1 (Positive)'], class_counts.values, color=['tab:red', 'tab:green'])
ax.set_title('Class Balance for y_3d')
ax.set_ylabel('Count')
plt.show()

In [None]:
corr_cols = [
    'log_ret_1d', 'ret_3d_back', 'ret_5d_back', 'ret_10d_back',
    'sma_9', 'sma_21', 'ema_9', 'ema_21', 'rsi_14',
    'volatility_14', 'volatility_21', 'volume_mean_14', 'volume_z_14',
    'macd', 'macd_signal', 'y_3d'
]
corr = df[corr_cols].corr(numeric_only=True)
fig, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(corr, cmap='coolwarm', center=0.0, ax=ax)
ax.set_title('Feature Correlation Heatmap')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(df.index, df['volatility_21'], color='tab:purple')
ax.set_title('Rolling Volatility (21-day std of log returns)')
ax.set_xlabel('Date')
ax.set_ylabel('Volatility')
plt.show()