# NoIQTrader - Phase 1: Data Collection & Feature Engineering

This notebook implements the data collection and feature engineering phase for the AI-powered Bitcoin trading system.

## Objectives:
1. Download BTC-USD historical daily data for the last 5 years
2. Clean and validate the data
3. Engineer technical indicators (MA, RSI, MACD, Bollinger Bands, volatility)
4. Add lag features for temporal dependencies
5. Visualize BTC price history and returns distribution
6. Save processed data for modeling phase

## 1. Import Required Libraries

Import all necessary libraries for data collection, manipulation, and visualization.

In [None]:
# Core libraries
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Visualization libraries
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

print("✅ All libraries imported successfully!")
print(f"Analysis date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

## 2. Import Custom Modules

Import our custom data loader and feature engineering modules.

In [None]:
# Add src directory to path
import sys
import os
sys.path.append(os.path.join(os.getcwd(), '..', 'src'))

# Import custom modules
from data_loader import BTCDataLoader
from feature_engineer import FeatureEngineer

print("✅ Custom modules imported successfully!")

## 3. Download BTC-USD Historical Data

Download 5 years of Bitcoin historical data using our data loader module.

In [None]:
# Initialize data loader
data_loader = BTCDataLoader(data_dir='../data')

# Fetch BTC data
print("📊 Fetching BTC-USD historical data...")
btc_data = data_loader.get_btc_data(force_refresh=True)

# Display basic information
print(f"\n📈 Data loaded successfully!")
print(f"Shape: {btc_data.shape}")
print(f"Date range: {btc_data.index[0].date()} to {btc_data.index[-1].date()}")
print(f"Total trading days: {len(btc_data)}")
print(f"Columns: {list(btc_data.columns)}")

# Show first and last few rows
print("\n🔍 First 5 rows:")
display(btc_data.head())

print("\n🔍 Last 5 rows:")
display(btc_data.tail())

## 4. Data Exploration and Quality Check

Examine the dataset structure, check for missing values, and validate data quality.

In [None]:
# Basic data info
print("📋 Dataset Information:")
print(f"Dataset shape: {btc_data.shape}")
print(f"Data types: \n{btc_data.dtypes}")
print(f"\nMemory usage: {btc_data.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

# Check for missing values
print("\n🔍 Missing Values Check:")
missing_values = btc_data.isnull().sum()
print(missing_values)

if missing_values.sum() == 0:
    print("✅ No missing values found!")
else:
    print(f"⚠️  Found {missing_values.sum()} missing values")

# Basic statistics
print("\n📊 Basic Statistics:")
display(btc_data.describe())

In [None]:
# Check for data anomalies
print("🔍 Data Quality Checks:")

# Check for negative prices
negative_prices = (btc_data[['Open', 'High', 'Low', 'Close']] < 0).any().any()
print(f"Negative prices: {'❌ Found' if negative_prices else '✅ None'}")

# Check High >= Low
invalid_hl = (btc_data['High'] < btc_data['Low']).sum()
print(f"Invalid High < Low: {'❌ ' + str(invalid_hl) + ' found' if invalid_hl > 0 else '✅ None'}")

# Check for zero volume days
zero_volume = (btc_data['Volume'] == 0).sum()
print(f"Zero volume days: {zero_volume}")

# Check for extreme price movements (>50% in one day)
daily_returns = btc_data['Close'].pct_change()
extreme_moves = (abs(daily_returns) > 0.5).sum()
print(f"Extreme price moves (>50%): {extreme_moves}")

# Display price range
print(f"\n💰 Price Range:")
print(f"All-time low: ${btc_data['Low'].min():,.2f}")
print(f"All-time high: ${btc_data['High'].max():,.2f}")
print(f"Current price: ${btc_data['Close'].iloc[-1]:,.2f}")
print(f"Price appreciation: {((btc_data['Close'].iloc[-1] / btc_data['Close'].iloc[0]) - 1) * 100:.1f}%")

## 5. Feature Engineering - Technical Indicators

Calculate all technical indicators using our feature engineering module.

In [None]:
# Initialize feature engineer
feature_engineer = FeatureEngineer()

print("🔧 Engineering technical features...")

# Create all features
featured_data = feature_engineer.create_all_features(btc_data)

print(f"\n✅ Feature engineering completed!")
print(f"Original columns: {len(btc_data.columns)}")
print(f"Featured columns: {len(featured_data.columns)}")
print(f"Added features: {len(featured_data.columns) - len(btc_data.columns)}")

# Display feature summary
feature_summary = feature_engineer.get_feature_summary(featured_data)
print("\n📋 Feature Summary (Top 20 by missing values):")
display(feature_summary.head(20))

In [None]:
# Display sample of key technical indicators
key_indicators = ['Close', 'MA10', 'MA50', 'MA200', 'RSI', 'MACD', 'MACD_signal', 
                 'BB_upper', 'BB_lower', 'volatility_20d', 'Close_lag_1']

available_indicators = [col for col in key_indicators if col in featured_data.columns]

print("🔍 Sample of Key Technical Indicators:")
display(featured_data[available_indicators].tail(10))

## 6. Visualize BTC Price History

Create comprehensive charts showing BTC price evolution with technical indicators.

In [None]:
# Create comprehensive price chart with technical indicators
fig = make_subplots(
    rows=4, cols=1,
    shared_xaxes=True,
    vertical_spacing=0.03,
    subplot_titles=('BTC Price with Moving Averages & Bollinger Bands', 
                   'Volume', 'RSI', 'MACD'),
    row_heights=[0.5, 0.2, 0.15, 0.15]
)

# Price chart with moving averages and Bollinger Bands
fig.add_trace(
    go.Scatter(x=featured_data.index, y=featured_data['Close'], 
               name='Close Price', line=dict(color='blue', width=2)),
    row=1, col=1
)

# Moving averages
if 'MA10' in featured_data.columns:
    fig.add_trace(
        go.Scatter(x=featured_data.index, y=featured_data['MA10'], 
                   name='MA10', line=dict(color='orange', width=1)),
        row=1, col=1
    )

if 'MA50' in featured_data.columns:
    fig.add_trace(
        go.Scatter(x=featured_data.index, y=featured_data['MA50'], 
                   name='MA50', line=dict(color='red', width=1)),
        row=1, col=1
    )

if 'MA200' in featured_data.columns:
    fig.add_trace(
        go.Scatter(x=featured_data.index, y=featured_data['MA200'], 
                   name='MA200', line=dict(color='purple', width=1)),
        row=1, col=1
    )

# Bollinger Bands
if 'BB_upper' in featured_data.columns and 'BB_lower' in featured_data.columns:
    fig.add_trace(
        go.Scatter(x=featured_data.index, y=featured_data['BB_upper'], 
                   name='BB Upper', line=dict(color='gray', dash='dash')),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(x=featured_data.index, y=featured_data['BB_lower'], 
                   name='BB Lower', line=dict(color='gray', dash='dash'),
                   fill='tonexty', fillcolor='rgba(128,128,128,0.1)'),
        row=1, col=1
    )

# Volume
fig.add_trace(
    go.Bar(x=featured_data.index, y=featured_data['Volume'], 
           name='Volume', marker_color='lightblue'),
    row=2, col=1
)

# RSI
if 'RSI' in featured_data.columns:
    fig.add_trace(
        go.Scatter(x=featured_data.index, y=featured_data['RSI'], 
                   name='RSI', line=dict(color='green')),
        row=3, col=1
    )
    # RSI levels
    fig.add_hline(y=70, line_dash="dash", line_color="red", row=3, col=1)
    fig.add_hline(y=30, line_dash="dash", line_color="green", row=3, col=1)

# MACD
if 'MACD' in featured_data.columns and 'MACD_signal' in featured_data.columns:
    fig.add_trace(
        go.Scatter(x=featured_data.index, y=featured_data['MACD'], 
                   name='MACD', line=dict(color='blue')),
        row=4, col=1
    )
    fig.add_trace(
        go.Scatter(x=featured_data.index, y=featured_data['MACD_signal'], 
                   name='MACD Signal', line=dict(color='red')),
        row=4, col=1
    )
    if 'MACD_histogram' in featured_data.columns:
        fig.add_trace(
            go.Bar(x=featured_data.index, y=featured_data['MACD_histogram'], 
                   name='MACD Histogram', marker_color='gray'),
            row=4, col=1
        )

# Update layout
fig.update_layout(
    title='Bitcoin (BTC-USD) Technical Analysis - 5 Year History',
    height=1000,
    showlegend=True,
    xaxis_rangeslider_visible=False
)

fig.update_yaxes(title_text="Price ($)", row=1, col=1)
fig.update_yaxes(title_text="Volume", row=2, col=1)
fig.update_yaxes(title_text="RSI", row=3, col=1, range=[0, 100])
fig.update_yaxes(title_text="MACD", row=4, col=1)

fig.show()

## 7. Price History Analysis

Analyze price movements and trends over different time periods.

In [None]:
# Create a focused price chart for recent period (last 2 years)
recent_data = featured_data.last('2Y')

plt.figure(figsize=(15, 10))

# Plot 1: Price with moving averages
plt.subplot(3, 1, 1)
plt.plot(recent_data.index, recent_data['Close'], label='Close Price', linewidth=2, color='blue')
if 'MA10' in recent_data.columns:
    plt.plot(recent_data.index, recent_data['MA10'], label='MA10', alpha=0.8, color='orange')
if 'MA50' in recent_data.columns:
    plt.plot(recent_data.index, recent_data['MA50'], label='MA50', alpha=0.8, color='red')
if 'MA200' in recent_data.columns:
    plt.plot(recent_data.index, recent_data['MA200'], label='MA200', alpha=0.8, color='purple')

plt.title('Bitcoin Price with Moving Averages (Last 2 Years)', fontsize=14, fontweight='bold')
plt.ylabel('Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)

# Plot 2: Volume
plt.subplot(3, 1, 2)
plt.bar(recent_data.index, recent_data['Volume'], alpha=0.7, color='lightblue')
plt.title('Trading Volume', fontsize=12, fontweight='bold')
plt.ylabel('Volume')
plt.grid(True, alpha=0.3)

# Plot 3: Daily Returns
plt.subplot(3, 1, 3)
if 'daily_return' in recent_data.columns:
    daily_returns = recent_data['daily_return'] * 100  # Convert to percentage
    colors = ['green' if x > 0 else 'red' for x in daily_returns]
    plt.bar(recent_data.index, daily_returns, alpha=0.7, color=colors)
    plt.axhline(y=0, color='black', linestyle='-', alpha=0.5)
    plt.title('Daily Returns (%)', fontsize=12, fontweight='bold')
    plt.ylabel('Return (%)')
    plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print recent performance metrics
if 'daily_return' in featured_data.columns:
    recent_returns = featured_data['daily_return'].last('1Y').dropna() * 100
    print(f"📊 Recent Performance Metrics (Last 1 Year):")
    print(f"Average daily return: {recent_returns.mean():.2f}%")
    print(f"Daily volatility: {recent_returns.std():.2f}%")
    print(f"Best day: {recent_returns.max():.2f}%")
    print(f"Worst day: {recent_returns.min():.2f}%")
    print(f"Total return: {((featured_data['Close'].iloc[-1] / featured_data['Close'].iloc[-252]) - 1) * 100:.1f}%")

## 8. Daily Returns Distribution Analysis

Analyze the distribution of daily returns to understand risk characteristics.

In [None]:
# Calculate daily returns if not already done
if 'daily_return' not in featured_data.columns:
    featured_data['daily_return'] = featured_data['Close'].pct_change()

# Remove NaN values
returns = featured_data['daily_return'].dropna() * 100  # Convert to percentage

# Create comprehensive returns analysis
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Histogram of daily returns
axes[0, 0].hist(returns, bins=100, alpha=0.7, color='skyblue', edgecolor='black')
axes[0, 0].axvline(returns.mean(), color='red', linestyle='--', linewidth=2, label=f'Mean: {returns.mean():.2f}%')
axes[0, 0].axvline(returns.median(), color='green', linestyle='--', linewidth=2, label=f'Median: {returns.median():.2f}%')
axes[0, 0].set_title('Distribution of Daily Returns', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Daily Return (%)')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Q-Q plot to check normality
from scipy import stats
stats.probplot(returns, dist="norm", plot=axes[0, 1])
axes[0, 1].set_title('Q-Q Plot (Normal Distribution)', fontsize=14, fontweight='bold')
axes[0, 1].grid(True, alpha=0.3)

# Box plot of returns
axes[1, 0].boxplot(returns, vert=True)
axes[1, 0].set_title('Box Plot of Daily Returns', fontsize=14, fontweight='bold')
axes[1, 0].set_ylabel('Daily Return (%)')
axes[1, 0].grid(True, alpha=0.3)

# Rolling volatility
rolling_vol = returns.rolling(window=30).std()
axes[1, 1].plot(featured_data.index[1:], rolling_vol, color='purple', linewidth=1)
axes[1, 1].set_title('30-Day Rolling Volatility', fontsize=14, fontweight='bold')
axes[1, 1].set_ylabel('Volatility (%)')
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Statistical summary
print("📈 Daily Returns Statistical Summary:")
print(f"Count: {len(returns):,}")
print(f"Mean: {returns.mean():.4f}%")
print(f"Median: {returns.median():.4f}%")
print(f"Standard Deviation: {returns.std():.4f}%")
print(f"Skewness: {returns.skew():.4f}")
print(f"Kurtosis: {returns.kurtosis():.4f}")
print(f"Minimum: {returns.min():.2f}%")
print(f"Maximum: {returns.max():.2f}%")
print(f"5th Percentile: {returns.quantile(0.05):.2f}%")
print(f"95th Percentile: {returns.quantile(0.95):.2f}%")

# Risk metrics
print(f"\n⚠️  Risk Metrics:")
print(f"Annualized Volatility: {returns.std() * np.sqrt(365):.2f}%")
print(f"Value at Risk (5%): {returns.quantile(0.05):.2f}%")
print(f"Expected Shortfall (5%): {returns[returns <= returns.quantile(0.05)].mean():.2f}%")

## 9. Technical Indicators Analysis

Analyze the behavior and effectiveness of technical indicators.

In [None]:
# Create technical indicators dashboard
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# RSI analysis
if 'RSI' in featured_data.columns:
    recent_data = featured_data.last('1Y')
    axes[0, 0].plot(recent_data.index, recent_data['RSI'], color='blue', linewidth=1)
    axes[0, 0].axhline(y=70, color='red', linestyle='--', alpha=0.7, label='Overbought (70)')
    axes[0, 0].axhline(y=30, color='green', linestyle='--', alpha=0.7, label='Oversold (30)')
    axes[0, 0].axhline(y=50, color='gray', linestyle='-', alpha=0.5, label='Neutral (50)')
    axes[0, 0].fill_between(recent_data.index, 70, 100, alpha=0.2, color='red')
    axes[0, 0].fill_between(recent_data.index, 0, 30, alpha=0.2, color='green')
    axes[0, 0].set_title('RSI (Relative Strength Index) - Last Year', fontsize=14, fontweight='bold')
    axes[0, 0].set_ylabel('RSI')
    axes[0, 0].set_ylim(0, 100)
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)

# MACD analysis
if all(col in featured_data.columns for col in ['MACD', 'MACD_signal', 'MACD_histogram']):
    recent_data = featured_data.last('1Y')
    axes[0, 1].plot(recent_data.index, recent_data['MACD'], label='MACD', color='blue', linewidth=1)
    axes[0, 1].plot(recent_data.index, recent_data['MACD_signal'], label='Signal', color='red', linewidth=1)
    axes[0, 1].bar(recent_data.index, recent_data['MACD_histogram'], label='Histogram', 
                  alpha=0.6, color='gray', width=1)
    axes[0, 1].axhline(y=0, color='black', linestyle='-', alpha=0.5)
    axes[0, 1].set_title('MACD Analysis - Last Year', fontsize=14, fontweight='bold')
    axes[0, 1].set_ylabel('MACD')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3)

# Bollinger Bands analysis
if all(col in featured_data.columns for col in ['BB_upper', 'BB_lower', 'BB_position']):
    recent_data = featured_data.last('6M')
    axes[1, 0].plot(recent_data.index, recent_data['Close'], label='Close', color='blue', linewidth=2)
    axes[1, 0].plot(recent_data.index, recent_data['BB_upper'], label='Upper Band', 
                   color='red', linestyle='--', alpha=0.7)
    axes[1, 0].plot(recent_data.index, recent_data['BB_lower'], label='Lower Band', 
                   color='green', linestyle='--', alpha=0.7)
    axes[1, 0].fill_between(recent_data.index, recent_data['BB_lower'], recent_data['BB_upper'], 
                           alpha=0.1, color='gray')
    axes[1, 0].set_title('Bollinger Bands - Last 6 Months', fontsize=14, fontweight='bold')
    axes[1, 0].set_ylabel('Price ($)')
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)

# Volatility analysis
if 'volatility_20d' in featured_data.columns:
    recent_vol = featured_data['volatility_20d'].last('2Y') * 100
    axes[1, 1].plot(recent_vol.index, recent_vol, color='purple', linewidth=1)
    axes[1, 1].axhline(y=recent_vol.mean(), color='red', linestyle='--', 
                      label=f'Mean: {recent_vol.mean():.2f}%')
    axes[1, 1].set_title('20-Day Rolling Volatility - Last 2 Years', fontsize=14, fontweight='bold')
    axes[1, 1].set_ylabel('Volatility (%)')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print technical indicator statistics
print("📊 Technical Indicator Statistics:")
if 'RSI' in featured_data.columns:
    rsi_data = featured_data['RSI'].dropna()
    print(f"\nRSI:")
    print(f"  Average: {rsi_data.mean():.2f}")
    print(f"  Overbought periods (>70): {(rsi_data > 70).sum()} days ({(rsi_data > 70).mean()*100:.1f}%)")
    print(f"  Oversold periods (<30): {(rsi_data < 30).sum()} days ({(rsi_data < 30).mean()*100:.1f}%)")

if 'volatility_20d' in featured_data.columns:
    vol_data = featured_data['volatility_20d'].dropna() * 100
    print(f"\n20-Day Volatility:")
    print(f"  Average: {vol_data.mean():.2f}%")
    print(f"  Minimum: {vol_data.min():.2f}%")
    print(f"  Maximum: {vol_data.max():.2f}%")
    print(f"  Current: {vol_data.iloc[-1]:.2f}%")

## 10. Feature Correlation Analysis

Analyze correlations between different features to understand relationships.

In [None]:
# Select key features for correlation analysis
key_features = [
    'Close', 'Volume', 'daily_return',
    'MA10', 'MA50', 'MA200', 'RSI', 'MACD', 'MACD_signal',
    'BB_position', 'volatility_20d', 'momentum_10d'
]

# Filter available features
available_features = [f for f in key_features if f in featured_data.columns]
correlation_data = featured_data[available_features].dropna()

# Calculate correlation matrix
correlation_matrix = correlation_data.corr()

# Create correlation heatmap
plt.figure(figsize=(12, 10))
mask = np.triu(np.ones_like(correlation_matrix, dtype=bool))
sns.heatmap(correlation_matrix, mask=mask, annot=True, cmap='coolwarm', center=0,
            square=True, linewidths=0.5, cbar_kws={"shrink": .8}, fmt='.2f')
plt.title('Feature Correlation Matrix', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

# Print high correlations
print("🔍 High Correlations (|r| > 0.7):")
high_corr_pairs = []
for i in range(len(correlation_matrix.columns)):
    for j in range(i+1, len(correlation_matrix.columns)):
        corr_val = correlation_matrix.iloc[i, j]
        if abs(corr_val) > 0.7:
            high_corr_pairs.append((
                correlation_matrix.columns[i], 
                correlation_matrix.columns[j], 
                corr_val
            ))

for feature1, feature2, corr in sorted(high_corr_pairs, key=lambda x: abs(x[2]), reverse=True):
    print(f"  {feature1} ↔ {feature2}: {corr:.3f}")

## 11. Data Quality and Missing Values Analysis

Final check on data quality and handle any remaining missing values.

In [None]:
# Comprehensive missing values analysis
missing_analysis = pd.DataFrame({
    'Column': featured_data.columns,
    'Missing_Count': featured_data.isnull().sum().values,
    'Missing_Percentage': (featured_data.isnull().sum() / len(featured_data) * 100).values,
    'Data_Type': featured_data.dtypes.values
})

missing_analysis = missing_analysis.sort_values('Missing_Percentage', ascending=False)

print("📋 Missing Values Analysis:")
print(f"Total features: {len(featured_data.columns)}")
print(f"Features with missing values: {(missing_analysis['Missing_Count'] > 0).sum()}")

# Display features with missing values
features_with_missing = missing_analysis[missing_analysis['Missing_Count'] > 0]
if len(features_with_missing) > 0:
    print("\n⚠️  Features with missing values:")
    display(features_with_missing)
else:
    print("\n✅ No missing values found!")

# Clean data for modeling (remove rows with too many missing values)
# Keep rows that have at least 80% of features present
threshold = 0.8 * len(featured_data.columns)
clean_data = featured_data.dropna(thresh=int(threshold))

print(f"\n🧹 Data Cleaning Results:")
print(f"Original rows: {len(featured_data):,}")
print(f"Clean rows: {len(clean_data):,}")
print(f"Removed rows: {len(featured_data) - len(clean_data):,}")
print(f"Data retention: {len(clean_data)/len(featured_data)*100:.1f}%")

# Update featured_data to clean version
featured_data = clean_data.copy()

## 12. Save Processed Data

Save the engineered dataset for use in future modeling phases.

In [None]:
# Save the processed dataset
output_path = '../data/btc_featured_data.csv'
featured_data.to_csv(output_path)

print(f"💾 Processed data saved to: {output_path}")
print(f"Dataset shape: {featured_data.shape}")
print(f"Date range: {featured_data.index[0].date()} to {featured_data.index[-1].date()}")
print(f"File size: {os.path.getsize(output_path) / 1024**2:.2f} MB")

# Create a feature list for reference
feature_list = pd.DataFrame({
    'Feature_Name': featured_data.columns,
    'Data_Type': featured_data.dtypes.values,
    'Description': [
        'Opening price' if col == 'Open' else
        'Highest price' if col == 'High' else
        'Lowest price' if col == 'Low' else
        'Closing price' if col == 'Close' else
        'Trading volume' if col == 'Volume' else
        'Daily return percentage' if col == 'daily_return' else
        f'{col.split("_")[1]} day moving average' if col.startswith('MA') and col[2:].isdigit() else
        'Relative Strength Index' if col == 'RSI' else
        'MACD line' if col == 'MACD' else
        'MACD signal line' if col == 'MACD_signal' else
        'MACD histogram' if col == 'MACD_histogram' else
        'Bollinger Band upper' if col == 'BB_upper' else
        'Bollinger Band lower' if col == 'BB_lower' else
        'Technical indicator feature' for col in featured_data.columns
    ]
})

feature_list_path = '../data/feature_list.csv'
feature_list.to_csv(feature_list_path, index=False)
print(f"📝 Feature list saved to: {feature_list_path}")

# Final summary
print(f"\n🎯 Phase 1 Summary:")
print(f"✅ Downloaded {len(featured_data)} days of BTC-USD data")
print(f"✅ Engineered {len(featured_data.columns)} features")
print(f"✅ Technical indicators: MA, RSI, MACD, Bollinger Bands, Volatility")
print(f"✅ Lag features and momentum indicators")
print(f"✅ Comprehensive visualizations created")
print(f"✅ Clean dataset ready for modeling phase")

# Display final dataset info
print(f"\n📊 Final Dataset Preview:")
display(featured_data.tail(5))
display(featured_data.describe())