In [None]:
# 01_data_exploration.ipynb

# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# --- Load Data ---
symbol = 'AAPL'  # Or any other symbol you've downloaded
data_path = f'../data/{symbol}.csv'
try:
    df = pd.read_csv(data_path, index_col='Date', parse_dates=True)
    print(f"Loaded data for {symbol}:")
    print(df.head())
    print(df.info())
except FileNotFoundError:
    print(f"Error: Data file not found at {data_path}. Make sure you've run src/data_loader.py.")

# --- Basic Data Inspection ---
if 'df' in locals():
    # Check for missing values
    print("\nMissing Values:")
    print(df.isnull().sum())

    # Basic descriptive statistics
    print("\nDescriptive Statistics:")
    print(df.describe())

    # --- Price Visualization ---
    plt.figure(figsize=(12, 6))
    plt.plot(df['Close'], label='Close Price')
    plt.title(f'{symbol} Close Price Over Time')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.legend()
    plt.grid(True)
    plt.show()

    # --- Volume Analysis ---
    plt.figure(figsize=(12, 6))
    plt.plot(df['Volume'], label='Volume')
    plt.title(f'{symbol} Trading Volume Over Time')
    plt.xlabel('Date')
    plt.ylabel('Volume')
    plt.legend()
    plt.grid(True)
    plt.show()

    # --- Returns Calculation and Analysis ---
    df['Returns'] = df['Close'].pct_change().dropna()
    plt.figure(figsize=(12, 6))
    plt.plot(df['Returns'], label='Daily Returns')
    plt.title(f'{symbol} Daily Returns')
    plt.xlabel('Date')
    plt.ylabel('Returns')
    plt.legend()
    plt.grid(True)
    plt.show()

    sns.histplot(df['Returns'], bins=50, kde=True)
    plt.title(f'{symbol} Daily Returns Distribution')
    plt.xlabel('Returns')
    plt.ylabel('Frequency')
    plt.show()