# Data Exploration

Explore loaded market data: price distributions, volume patterns, missing data, and basic statistics.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load sample data
df = pd.read_csv('data/sample/eod_reliance.csv', parse_dates=['date'])
print(f'Shape: {df.shape}')
df.head()

In [None]:
# Basic statistics
df.describe()

In [None]:
# Price chart
fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

axes[0].plot(df['date'], df['close'], label='Close', color='steelblue')
axes[0].fill_between(df['date'], df['low'], df['high'], alpha=0.2, color='steelblue')
axes[0].set_ylabel('Price (INR)')
axes[0].set_title('RELIANCE - Daily OHLCV')
axes[0].legend()

axes[1].bar(df['date'], df['volume'], color='gray', alpha=0.6)
axes[1].set_ylabel('Volume')
axes[1].set_xlabel('Date')

plt.tight_layout()
plt.show()

In [None]:
# Daily returns distribution
df['returns'] = df['close'].pct_change()

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].hist(df['returns'].dropna(), bins=30, edgecolor='black', alpha=0.7)
axes[0].set_title('Return Distribution')
axes[0].set_xlabel('Daily Return')

axes[1].plot(df['date'], df['returns'], color='steelblue', alpha=0.7)
axes[1].axhline(y=0, color='red', linestyle='--', alpha=0.5)
axes[1].set_title('Daily Returns Over Time')
axes[1].set_xlabel('Date')

plt.tight_layout()
plt.show()

print(f'Mean return: {df["returns"].mean():.4%}')
print(f'Std return:  {df["returns"].std():.4%}')
print(f'Sharpe (ann): {df["returns"].mean() / df["returns"].std() * np.sqrt(252):.2f}')

In [None]:
# Missing data check
print('Missing values:')
print(df.isnull().sum())
print(f'\nDate range: {df["date"].min()} to {df["date"].max()}')
print(f'Trading days: {len(df)}')