# 01-data-exploration.ipynb

Explore raw data and basic statistics including new risk-based features

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load processed data (with features)
df = pd.read_csv('data/processed/EURUSD_60min.csv', parse_dates=['timestamp'])

print(df.head())

# Plot price chart
plt.figure(figsize=(12, 6))
plt.plot(df['timestamp'], df['close'])
plt.title('Close Price Over Time')
plt.xlabel('Time')
plt.ylabel('Price')
plt.show()

# Plot risk-based features distributions
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.hist(df['pos_size_units'], bins=50, color='blue')
plt.title('Position Size Units Distribution')

plt.subplot(1, 3, 2)
plt.hist(df['pos_used_leverage'], bins=50, color='orange')
plt.title('Used Leverage Distribution')

plt.subplot(1, 3, 3)
plt.hist(df['pos_notional'], bins=50, color='green')
plt.title('Notional Exposure Distribution')

plt.tight_layout()
plt.show()

# Summary statistics of risk features
print(df[['pos_size_units', 'pos_used_leverage', 'pos_notional']].describe())