In [24]:
import polars as pl
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
import research
import datetime

# Fix the data loading issue first
def load_data_safely(sym, time_interval, start_date, end_date):
    """Load data with proper error handling"""
    try:
        ts = research.load_ohlc_timeseries_range(sym, time_interval, start_date, end_date)
        return ts
    except Exception as e:
        print(f"Error loading data: {e}")
        # Alternative: Use available data or synthetic data for testing
        return create_sample_data()

def create_sample_data():
    """Create sample data for testing when real data is unavailable"""
    dates = pl.datetime_range(
        start=datetime.datetime(2024, 1, 1),
        end=datetime.datetime(2024, 10, 29),
        interval="1h",
        eager=True
    )
    
    # Create realistic price data with trends and volatility
    np.random.seed(42)
    n = len(dates)
    returns = np.random.normal(0.0001, 0.01, n)  # Small positive drift
    prices = 50000 * np.cumprod(1 + returns)
    
    # Add some realistic OHLC structure
    data = {
        'datetime': dates,
        'open': prices * (1 + np.random.normal(0, 0.001, n)),
        'high': prices * (1 + np.abs(np.random.normal(0.002, 0.002, n))),
        'low': prices * (1 - np.abs(np.random.normal(0.002, 0.002, n))),
        'close': prices
    }
    
    return pl.DataFrame(data)

# Enhanced Feature Engineering
def create_advanced_features(ts, target='close', lags=10, forecast_horizon=1):
    """Create comprehensive feature set for time series prediction"""
    
    ts = ts.with_columns([
        # Basic price transformations
        (pl.col('close') / pl.col('close').shift(1)).log().alias('log_return'),
        pl.col('close').pct_change().alias('return'),
        
        # Rolling statistics
        pl.col('close').rolling_mean(window_size=5).alias('sma_5'),
        pl.col('close').rolling_mean(window_size=20).alias('sma_20'),
        pl.col('close').rolling_std(window_size=20).alias('volatility_20'),
        
        # Price position relative to range
        ((pl.col('close') - pl.col('low')) / (pl.col('high') - pl.col('low') + 1e-8)).alias('price_position'),
        
        # Momentum indicators
        (pl.col('close') / pl.col('close').shift(5) - 1).alias('momentum_5'),
        (pl.col('close') / pl.col('close').shift(10) - 1).alias('momentum_10'),
    ])
    
    # Add lagged features for returns
    for i in range(1, lags + 1):
        ts = ts.with_columns([
            pl.col('log_return').shift(i).alias(f'log_return_lag_{i}')
        ])
    
    # Technical indicators
    ts = ts.with_columns([
        # RSI approximation (14-period)
        pl.when(pl.col('log_return') > 0)
        .then(pl.col('log_return'))
        .otherwise(0)
        .rolling_sum(14)
        .alias('gain'),
        
        pl.when(pl.col('log_return') < 0)
        .then(-pl.col('log_return'))
        .otherwise(0)
        .rolling_sum(14)
        .alias('loss')
    ])
    
    ts = ts.with_columns([
        (100 - (100 / (1 + (pl.col('gain') / (pl.col('loss') + 1e-8))))).alias('rsi_14'),
        
        # Volatility regime
        (pl.col('log_return').rolling_std(5) / 
         (pl.col('log_return').rolling_std(20) + 1e-8)).alias('volatility_ratio'),
    ])
    
    # Target variable
    ts = ts.with_columns([
        pl.col('log_return').shift(-forecast_horizon).alias('target_log_return')
    ])
    
    return ts.drop_nulls()

# Improved Model Architecture
class ImprovedMLP(nn.Module):
    def __init__(self, input_dim, hidden_dims=[128, 64, 32], dropout=0.3):
        super().__init__()
        
        layers = []
        prev_dim = input_dim
        
        for i, hidden_dim in enumerate(hidden_dims):
            layers.extend([
                nn.Linear(prev_dim, hidden_dim),
                nn.BatchNorm1d(hidden_dim),
                nn.ReLU(),
                nn.Dropout(dropout)
            ])
            prev_dim = hidden_dim
        
        # Output: 3 classes (sell, hold, buy)
        layers.append(nn.Linear(prev_dim, 3))
        
        self.network = nn.Sequential(*layers)
        
    def forward(self, x):
        return self.network(x)

# Fixed Training Function
def train_model_advanced(model, train_loader, val_loader, criterion, optimizer, num_epochs=100):
    """Enhanced training with early stopping and learning rate scheduling"""
    
    best_val_loss = float('inf')
    patience = 15
    patience_counter = 0
    
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=8, factor=0.5)
    
    train_losses = []
    val_losses = []
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        
        for batch_x, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            
            train_loss += loss.item()
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                outputs = model(batch_x)
                val_loss += criterion(outputs, batch_y).item()
        
        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)
        
        train_losses.append(avg_train_loss)
        val_losses.append(avg_val_loss)
        
        scheduler.step(avg_val_loss)
        
        # Early stopping
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            patience_counter += 1
            
        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch}")
            break
            
        if epoch % 20 == 0:
            current_lr = optimizer.param_groups[0]['lr']
            print(f'Epoch {epoch}: Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, LR: {current_lr:.6f}')
    
    # Load best model
    model.load_state_dict(torch.load('best_model.pth'))
    return model, train_losses, val_losses

def create_regime_labels(returns, thresholds=[-0.002, 0.002]):
    """Create classification labels based on return regimes"""
    labels = []
    for ret in returns:
        if ret < thresholds[0]:
            labels.append(0)  # Sell (strong negative)
        elif ret > thresholds[1]:
            labels.append(2)  # Buy (strong positive)
        else:
            labels.append(1)  # Hold (neutral)
    return np.array(labels)

def prepare_data_for_training(ts, feature_columns, test_size=0.2, val_size=0.1, batch_size=32):
    """Prepare data for neural network training"""
    # Filter out any remaining nulls
    ts_clean = ts.drop_nulls()
    
    # Extract features and targets
    X = ts_clean[feature_columns].to_numpy().astype(np.float32)
    y = create_regime_labels(ts_clean['target_log_return'].to_numpy())
    
    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Split data
    total_size = len(X_scaled)
    test_start = int(total_size * (1 - test_size - val_size))
    val_start = int(total_size * (1 - test_size))
    
    X_train = X_scaled[:test_start]
    X_val = X_scaled[test_start:val_start]
    X_test = X_scaled[val_start:]
    
    y_train = y[:test_start]
    y_val = y[test_start:val_start]
    y_test = y[val_start:]
    
    # Create data loaders
    train_dataset = TensorDataset(
        torch.tensor(X_train, dtype=torch.float32),
        torch.tensor(y_train, dtype=torch.long)
    )
    val_dataset = TensorDataset(
        torch.tensor(X_val, dtype=torch.float32),
        torch.tensor(y_val, dtype=torch.long)
    )
    test_dataset = TensorDataset(
        torch.tensor(X_test, dtype=torch.float32),
        torch.tensor(y_test, dtype=torch.long)
    )
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader, test_loader, scaler

# Risk-Managed Trading Strategy
class RiskManagedStrategy:
    def __init__(self, initial_capital=10000, max_position_size=0.2, stop_loss=0.03):
        self.capital = initial_capital
        self.max_position_size = max_position_size
        self.stop_loss = stop_loss
        self.position = 0
        self.entry_price = 0
        self.trades = []
        
    def execute_trade(self, signal, confidence, current_price, volatility):
        """Execute trade with risk management"""
        position_size = self.calculate_position_size(confidence, volatility)
        
        if signal == 0 and self.position > 0:  # Sell signal and we're long
            # Close long position
            pnl = (current_price - self.entry_price) / self.entry_price * self.position
            self.capital *= (1 + pnl)
            self.trades.append(('SELL', current_price, pnl))
            self.position = 0
            
        elif signal == 2 and self.position == 0:  # Buy signal and no position
            # Open long position
            investment = self.capital * position_size
            self.position = investment / current_price
            self.entry_price = current_price
            self.trades.append(('BUY', current_price, 0))
            
        elif signal == 0 and self.position == 0:  # Sell signal and no position
            # Could implement short selling here
            pass
            
        # Check stop loss
        if self.position > 0:
            current_pnl = (current_price - self.entry_price) / self.entry_price
            if current_pnl < -self.stop_loss:
                # Stop loss triggered
                self.capital *= (1 + current_pnl)
                self.trades.append(('STOP_LOSS', current_price, current_pnl))
                self.position = 0
    
    def calculate_position_size(self, confidence, volatility):
        """Dynamic position sizing based on confidence and market volatility"""
        base_size = self.max_position_size * confidence
        # Reduce position size in high volatility
        volatility_adjustment = 1.0 / (1.0 + volatility * 50)  # Adjust scaling factor
        return min(base_size * volatility_adjustment, self.max_position_size)
    
    def get_portfolio_value(self, current_price):
        """Calculate current portfolio value"""
        position_value = self.position * current_price if self.position > 0 else 0
        return self.capital + position_value

# Complete Improved Pipeline
def run_enhanced_strategy(sym, time_interval, start_date, end_date):
    """Run the complete enhanced trading strategy"""
    print("Loading data...")
    
    # Load and prepare data
    ts = load_data_safely(sym, time_interval, start_date, end_date)
    print(f"Loaded data with {len(ts)} rows")
    
    # Create features
    ts = create_advanced_features(ts, lags=8)
    print(f"After feature engineering: {len(ts)} rows")
    
    # Select features
    feature_columns = [col for col in ts.columns if any(x in col for x in ['lag_', 'sma_', 'momentum', 'volatility', 'rsi', 'price_position'])]
    print(f"Using {len(feature_columns)} features: {feature_columns}")
    
    # Prepare data for training
    train_loader, val_loader, test_loader, scaler = prepare_data_for_training(
        ts, feature_columns, test_size=0.2, val_size=0.1, batch_size=64
    )
    
    # Create model
    model = ImprovedMLP(input_dim=len(feature_columns))
    print(f"Model created with {sum(p.numel() for p in model.parameters())} parameters")
    
    # Training setup
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)
    
    # Train model
    print("Training model...")
    model, train_losses, val_losses = train_model_advanced(
        model, train_loader, val_loader, criterion, optimizer, num_epochs=200
    )
    
    # Evaluate model
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            outputs = model(batch_x)
            _, predicted = torch.max(outputs.data, 1)
            total += batch_y.size(0)
            correct += (predicted == batch_y).sum().item()
    
    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    
    # Backtest strategy
    print("Running backtest...")
    backtest_results = backtest_strategy(model, scaler, ts, feature_columns)
    
    return backtest_results, model, train_losses, val_losses

def backtest_strategy(model, scaler, ts, feature_columns):
    """Backtest the trading strategy"""
    # Filter test period
    test_start = int(len(ts) * 0.7)
    ts_test = ts[test_start:].drop_nulls()
    
    if len(ts_test) == 0:
        print("No test data available")
        return None
    
    # Prepare features
    X_test = ts_test[feature_columns].to_numpy().astype(np.float32)
    X_test_scaled = scaler.transform(X_test)
    
    # Get predictions
    model.eval()
    with torch.no_grad():
        outputs = model(torch.tensor(X_test_scaled, dtype=torch.float32))
        _, predictions = torch.max(outputs, 1)
    
    # Run strategy
    strategy = RiskManagedStrategy(initial_capital=10000)
    prices = ts_test['close'].to_numpy()
    
    for i, (pred, price) in enumerate(zip(predictions, prices)):
        if i >= 20:  # Wait for enough volatility data
            volatility = ts_test['volatility_20'].to_numpy()[i]
            confidence = torch.softmax(outputs[i], dim=0)[pred].item()
            
            strategy.execute_trade(pred.item(), confidence, price, volatility)
    
    final_value = strategy.get_portfolio_value(prices[-1])
    total_return = (final_value - 10000) / 10000
    
    print(f"Initial Capital: $10,000")
    print(f"Final Portfolio Value: ${final_value:.2f}")
    print(f"Total Return: {total_return:.2%}")
    print(f"Number of Trades: {len(strategy.trades)}")
    
    return {
        'final_value': final_value,
        'total_return': total_return,
        'trades': strategy.trades,
        'model': model
    }

# Run the enhanced strategy (with proper error handling)
try:
    results, model, train_losses, val_losses = run_enhanced_strategy(
        'BTCUSDT', '1h', 
        datetime.datetime(2025, 1, 1), 
        datetime.datetime(2025, 10, 29)
    )
    
    if results:
        print("Strategy completed successfully!")
        print(f"Total Return: {results['total_return']:.2%}")
        
except Exception as e:
    print(f"Error running strategy: {e}")
    import traceback
    traceback.print_exc()

Loading data...


Loading BTCUSDT: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 302/302 [02:09<00:00,  2.33day/s]


Loaded data with 7080 rows
After feature engineering: 7059 rows
Using 16 features: ['sma_5', 'sma_20', 'volatility_20', 'price_position', 'momentum_5', 'momentum_10', 'log_return_lag_1', 'log_return_lag_2', 'log_return_lag_3', 'log_return_lag_4', 'log_return_lag_5', 'log_return_lag_6', 'log_return_lag_7', 'log_return_lag_8', 'rsi_14', 'volatility_ratio']
Model created with 13059 parameters
Training model...
Epoch 0: Train Loss: 1.0800, Val Loss: 0.9819, LR: 0.001000
Epoch 20: Train Loss: 1.0055, Val Loss: 0.9642, LR: 0.001000
Early stopping at epoch 33
Test Accuracy: 56.87%
Running backtest...
Initial Capital: $10,000
Final Portfolio Value: $9642.74
Total Return: -3.57%
Number of Trades: 75
Strategy completed successfully!
Total Return: -3.57%
