# Trading Data Analysis - Exploratory Analysis

This notebook explores the trading data and demonstrates the feature generation process.

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from src.data_analysis_pipeline.config import load_config
from src.data_analysis_pipeline.data_ingestion import load_trading_data
from src.data_analysis_pipeline.data_cleaning import validate_and_clean_data
from src.data_analysis_pipeline.feature_engineering import generate_features

## Load and Explore Raw Data

In [None]:
# Load raw data
raw_data = load_trading_data()

print("Raw data shape:", raw_data.shape)
print("\nColumns:", raw_data.columns.tolist())
print("\nSample data:")
raw_data.head()

## Data Quality Analysis

In [None]:
# Basic statistics
print("Basic statistics:")
raw_data.describe()

# Check for missing values
print("\nMissing values:")
raw_data.isnull().sum()

## Price Analysis

In [None]:
def plot_price_analysis(df, price_col='price'):
    fig = make_subplots(rows=2, cols=1, subplot_titles=['Price Over Time', 'Daily Returns Distribution'])
    
    # Price plot
    fig.add_trace(
        go.Scatter(x=df.index, y=df[price_col], name='Price'),
        row=1, col=1
    )
    
    # Returns distribution
    returns = np.log(df[price_col]).diff()
    fig.add_trace(
        go.Histogram(x=returns, name='Returns Distribution', nbinsx=50),
        row=2, col=1
    )
    
    fig.update_layout(height=800, title_text="Price Analysis")
    fig.show()

plot_price_analysis(raw_data)

## Feature Generation Example

In [None]:
# Clean data
cleaned_data = validate_and_clean_data(raw_data)

# Generate features
feature_data = generate_features(cleaned_data)

print("Generated features:")
new_features = set(feature_data.columns) - set(raw_data.columns)
print("\n".join(sorted(new_features)))

## Technical Indicators Visualization