In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [None]:
# RANGE_THRESHOLD = 0.6
RANGE_THRESHOLD = 0.3

def load_data_from_csv(daily_file, hourly_file, minute_file):
    # Load data from files
    daily_data = pd.read_csv(daily_file)
    hourly_data = pd.read_csv(hourly_file)  
    minute_data = pd.read_csv(minute_file)
    
    # Convert time columns to datetime
    daily_data['Time'] = pd.to_datetime(daily_data['Time'])
    hourly_data['Time'] = pd.to_datetime(hourly_data['Time'])
    minute_data['Time'] = pd.to_datetime(minute_data['Time'])
    
    # Set Time as index
    daily_data.set_index('Time', inplace=True)
    hourly_data.set_index('Time', inplace=True)
    minute_data.set_index('Time', inplace=True)
    
    # Create timeframe dict
    data = {
        "daily": daily_data,
        "hourly": hourly_data,
        "minute": minute_data
    }
    
    return data

def preprocess_timeframe(df, timeframe):
    df = add_common_indicators(df)
    df = add_mean_reversion_indicators(df, timeframe)
    return df

def add_common_indicators(df):
    df = df.dropna()
    data_length = len(df)
    sma_short = min(20, max(5, data_length // 4))
    sma_medium = min(50, max(10, data_length // 2))
    # Add moving averages
    df['sma20'] = df['Close'].rolling(window=sma_short).mean()
    df['sma50'] = df['Close'].rolling(window=sma_medium).mean()
    # Add Bollinger Bands
    df['bollinger_mid'] = df['sma20']
    df['bollinger_std'] = df['Close'].rolling(window=sma_short).std()
    df['bollinger_upper'] = df['bollinger_mid'] + 2 * df['bollinger_std']
    df['bollinger_lower'] = df['bollinger_mid'] - 2 * df['bollinger_std']
    # Add RSI
    rsi_window = min(14, max(5, data_length // 3))
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=rsi_window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=rsi_window).mean()
    rs = gain / loss
    rs = rs.replace([np.inf, -np.inf], np.nan).fillna(1)  # Handle division by zero
    df['rsi'] = 100 - (100 / (1 + rs))
    # Add ATR (Average True Range)
    atr_window = min(14, max(5, data_length // 3))
    tr1 = abs(df['High'] - df['Low'])
    tr2 = abs(df['High'] - df['Close'].shift())
    tr3 = abs(df['Low'] - df['Close'].shift())
    tr = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
    df['atr'] = tr.rolling(window=atr_window).mean()
    df = df.fillna(method='ffill').fillna(method='bfill')
    return df

def detect_range_boundaries(df, lookback):
    # Calculate based on historical highs and lows
    upper_boundary = df['High'].rolling(window=lookback).max()
    lower_boundary = df['Low'].rolling(window=lookback).min()
    
    return upper_boundary, lower_boundary

def add_mean_reversion_indicators(df, timeframe):
    # Determine appropriate lookback based on timeframe
    data_length = len(df)
    
    if timeframe == "daily":
        lookback = min(20, max(3, data_length // 2))
    elif timeframe == "hourly":
        lookback = min(48, max(4, data_length // 3))
    else:  # minute
        lookback = min(60, max(5, data_length // 4))
    
    # Calculate adaptive mean
    df['mean'] = df['Close'].rolling(window=lookback).mean()
    # Detect range boundaries
    df['upper_range'], df['lower_range'] = detect_range_boundaries(df, lookback)
    # Calculate distance from mean and boundaries
    df['distance_from_mean'] = (df['Close'] - df['mean']) / df['mean'] * 100
    # Safely calculate distance from boundaries (as percentage of range)
    range_width = df['upper_range'] - df['lower_range']
    df['distance_from_upper'] = np.where(
        range_width > 0, 
        (df['upper_range'] - df['Close']) / range_width * 100,
        50  # Default value for undefined ranges
    )
    df['distance_from_lower'] = np.where(
        range_width > 0, 
        (df['Close'] - df['lower_range']) / range_width * 100,
        50  # Default value for undefined ranges
    )
    
    # Calculate range strength
    df['range_strength'] = calculate_range_strength(df, lookback)
    # Calculate mean reversion probability
    df['mean_reversion_probability'] = calculate_reversion_probability(df)
    # Flag if we're in a range market
    df['is_range_market'] = df['range_strength'] > RANGE_THRESHOLD
    
    return df

def calculate_range_strength(df, lookback):
    price_direction = df['Close'].diff()
    direction_change = ((price_direction > 0) != (price_direction.shift() > 0)).rolling(window=lookback).sum()
    range_width = (df['upper_range'] - df['lower_range']) / df['mean']
    traversal = df['Close'].rolling(window=lookback).std() / range_width
    max_direction_changes = lookback - 1
    norm_direction_change = (direction_change / max_direction_changes).clip(0, 1)
    norm_range_width = (1 - range_width.clip(0, 0.2) / 0.2).clip(0, 1)
    norm_traversal = traversal.clip(0, 1)
    range_strength = (
        norm_direction_change * 0.4 +  # Frequency of oscillation
        norm_range_width * 0.3 +       # Narrowness of range
        norm_traversal * 0.3           # Coverage of range
    ).clip(0, 1)
    return range_strength

def calculate_reversion_probability(df):
    # Factor 1: Distance from mean (normalized)
    distance_factor = abs(df['distance_from_mean'] / 100).clip(0, 0.5) * 2
    # Factor 2: RSI extremes (higher probability when RSI is extreme)
    rsi = df['rsi'].fillna(50) 
    rsi_factor = np.where(rsi < 30, (30 - rsi) / 30, 
                        np.where(rsi > 70, (rsi - 70) / 30, 0))
    
    # Factor 3: Bollinger Band proximity
    bb_upper_dist = (df['Close'] - df['bollinger_upper']) / df['bollinger_std']
    bb_lower_dist = (df['bollinger_lower'] - df['Close']) / df['bollinger_std']
    bb_factor = np.where(bb_upper_dist > 0, bb_upper_dist, 
                       np.where(bb_lower_dist > 0, bb_lower_dist, 0)).clip(0, 1)
    # Factor 4: Range strength (higher probability in strong ranges)
    range_factor = df['range_strength']
    # Combine factors with weights
    probability = (
        distance_factor * 0.3 +
        rsi_factor * 0.2 + 
        bb_factor * 0.2 + 
        range_factor * 0.3
    ).clip(0, 1)
    return probability

def temporal_split(data, split_date):
    split_date = pd.to_datetime(split_date).tz_localize('UTC')
    train_data = {}
    test_data = {}
    
    for timeframe, df in data.items():
        train_data[timeframe] = df[df.index < split_date].copy()
        test_data[timeframe] = df[df.index >= split_date].copy()
    
    return train_data, test_data

def get_data_at_timestamp(multi_timeframe_data, timestamp):
    result = {}
    
    for timeframe, df in multi_timeframe_data.items():
        # Get the last available row before or at the timestamp
        df_before = df[df.index <= timestamp]
        if not df_before.empty:
            result[timeframe] = df_before.iloc[-1]
        else:
            result[timeframe] = None
    return result

def prepare_multi_timeframe_data(daily_file, hourly_file, minute_file, split_date="2022-01-01"):
    print(f"Loading data from CSV files...")
    data = load_data_from_csv(daily_file, hourly_file, minute_file)
    # Process each timeframe
    for timeframe in data:
        print(f"Processing {timeframe} data...")
        data[timeframe] = preprocess_timeframe(data[timeframe], timeframe)
    
    # Split into training and testing sets
    print(f"Splitting data at {split_date}")
    train_data, test_data = temporal_split(data, split_date)
    
    # Print summary
    for timeframe in train_data:
        print(f"Training {timeframe} data: {train_data[timeframe].shape[0]} rows")
        print(f"Testing {timeframe} data: {test_data[timeframe].shape[0]} rows")
    
    return train_data, test_data

# Example usage
if __name__ == "__main__":
    daily_file = "/Users/newuser/Projects/robust_algo_trader/data/gen_alpaca_data/CRM_D1_raw_data.csv"
    hourly_file = "/Users/newuser/Projects/robust_algo_trader/data/gen_alpaca_data/CRM_H1_raw_data.csv"
    minute_file = "/Users/newuser/Projects/robust_algo_trader/data/gen_alpaca_data/CRM_M1_raw_data.csv"
    
    # Prepare data
    train_data, test_data = prepare_multi_timeframe_data(
        # daily_file, hourly_file, minute_file, split_date="2022-01-01"
        daily_file, hourly_file, minute_file, split_date="2017-01-01"
    )

In [None]:
train_data['minute']

In [None]:
import uuid
import pandas as pd
import numpy as np

class Position:
    def __init__(self, id, timestamp, action, price, stop_loss, target, entry_reason):
        self.id = id
        self.timestamp = timestamp
        self.action = action
        self.price = price
        self.stop_loss = stop_loss
        self.target = target
        self.entry_reason = entry_reason

class Signal:
    def __init__(self, timestamp, action, price, reason=''):
        self.timestamp = timestamp
        self.action = action
        self.price = price
        self.reason = reason
    
    def to_dict(self):
        return {
            'timestamp': self.timestamp,
            'action': self.action,
            'price': self.price,
            'reason': self.reason
        }


class MeanReversionStrategy:
    def __init__(self, params):
        self.entry_threshold_upper = params.get('entry_threshold_upper', 25)
        self.entry_threshold_lower = params.get('entry_threshold_lower', 25)
        
        self.exit_threshold_pct = params.get('exit_threshold_pct', 5)
        
        self.use_atr_stops = params.get('use_atr_stops', True)
        self.stop_loss_atr_multiplier = params.get('stop_loss_atr_multiplier', 2.0)
        self.stop_loss_range_factor = params.get('stop_loss_range_factor', 0.15)
        
        self.position_sizing_factor = params.get('position_sizing_factor', 1.0)
        self.max_position_size = params.get('max_position_size', 1.0)
        
        self.range_threshold = params.get('range_threshold', 0.5)
        
        self.positions = []
        self.last_action = "NOTHING"
    
    def generate_signals(self, data):
        signals = []
        
        for idx, row in data.iterrows():
            timestamp = idx
            current_price = float(row['Close'])
            
            signal = Signal(
                timestamp=timestamp,
                action="NOTHING",
                price=current_price,
                reason="default_state"
            )
            
            if pd.isna(row['mean']) or pd.isna(row['upper_range']) or pd.isna(row['lower_range']):
                signal.reason = "missing_data"
                signals.append(signal)
                continue
            
            if self.positions:
                position = self.positions[0]
                
                exit_reason = self._check_exit_conditions(row, position)
                if exit_reason:
                    signal.action = "CLOSE"
                    signal.reason = exit_reason
                    
                    self.positions = []
                    self.last_action = "CLOSE"
                else:
                    signal.action = "HOLD"
                    signal.reason = "holding_position"
                    self.last_action = "HOLD"
                
                signals.append(signal)
                continue
            
            is_range_market = self._is_in_range_market(row)
            if not is_range_market:
                signal.reason = "not_range_market"
                signals.append(signal)
                continue
            
            entry_action = self._check_entry_conditions(row)
            if entry_action:
                signal.action = entry_action
                signal.reason = "long_mean_reversion" if entry_action == "BUY" else "short_mean_reversion"
                
                self._add_position(timestamp, current_price, row, entry_action)
                self.last_action = entry_action
            else:
                signal.reason = "no_entry_condition"
            
            signals.append(signal)
        
        return signals
    
    def _is_in_range_market(self, row):
        if 'is_range_market' in row and not pd.isna(row['is_range_market']):
            return bool(row['is_range_market'])
        elif 'range_strength' in row and not pd.isna(row['range_strength']):
            return float(row['range_strength']) > self.range_threshold
        return False
    
    def _check_entry_conditions(self, row):
        distance_from_lower = float(row['distance_from_lower'])
        distance_from_upper = float(row['distance_from_upper'])
        
        mean_reversion_prob = 0.7
        if 'mean_reversion_probability' in row and not pd.isna(row['mean_reversion_probability']):
            mean_reversion_prob = float(row['mean_reversion_probability'])
        
        if mean_reversion_prob <= 0.5:
            return None
            
        if distance_from_lower < self.entry_threshold_lower:
            return "BUY"
        
        if distance_from_upper < self.entry_threshold_upper:
            return "SELL"
        
        return None
    
    def _check_exit_conditions(self, row, position):
        current_price = float(row['Close'])
        mean_price = float(row['mean'])
        
        # Check stop loss first (risk management priority)
        if (position.action == 'BUY' and current_price <= position.stop_loss) or \
           (position.action == 'SELL' and current_price >= position.stop_loss):
            return 'stop_loss_hit'
        
        # Direction-aware target checking
        if position.action == 'BUY':
            # For BUY positions, price should rise toward or above mean
            if current_price >= mean_price * (1 - self.exit_threshold_pct/100):
                return 'target_reached'
        else:  # SELL
            # For SELL positions, price should fall toward or below mean
            if current_price <= mean_price * (1 + self.exit_threshold_pct/100):
                return 'target_reached'
        
        return None
    
    def _add_position(self, timestamp, price, row, action):
        range_width = float(row['upper_range'] - row['lower_range'])
        
        if self.use_atr_stops and 'atr' in row and not pd.isna(row['atr']):
            if action == "BUY":
                stop_loss = price - (float(row['atr']) * self.stop_loss_atr_multiplier)
            else:  # SELL
                stop_loss = price + (float(row['atr']) * self.stop_loss_atr_multiplier)
        else:
            if action == "BUY":
                stop_loss = float(row['lower_range']) - (range_width * self.stop_loss_range_factor)
            else:  # SELL
                stop_loss = float(row['upper_range']) + (range_width * self.stop_loss_range_factor)
        
        target = float(row['mean'])
        
        position = Position(
            id=str(uuid.uuid4()),
            timestamp=timestamp,
            action=action,
            price=price,
            stop_loss=stop_loss,
            target=target,
            entry_reason='long_mean_reversion' if action == "BUY" else 'short_mean_reversion'
        )
        
        self.positions.append(position)
    
    def reset(self):
        self.positions = []
        self.last_action = "NOTHING"


def run_strategy(data, params=None):
    if params is None:
        params = {
            'entry_threshold_upper': 15,
            'entry_threshold_lower': 15,
            'exit_threshold_pct': 2,
            'use_atr_stops': True,
            'stop_loss_atr_multiplier': 3.5,
            'position_sizing_factor': 1.0,
            'max_position_size': 1.0,
            'range_threshold': 0.65
        }
    
    strategy = MeanReversionStrategy(params)
    
    signals = strategy.generate_signals(data)
    
    return signals, strategy

In [None]:
signals, strategy = run_strategy(train_data['minute'])


In [None]:
import pandas as pd

# Convert signals to dictionaries
signal_dicts = [signal.to_dict() for signal in signals]

# Create DataFrame
signals_df = pd.DataFrame(signal_dicts)

# Display first few signals
# print(signals_df.head())

In [None]:
signals_df
# n_signals filter where action is not NOTHING
n_signals = signals_df[signals_df['action'] != 'NOTHING']
n_signals

In [None]:
# Find problematic SELL trades
problematic_trades = []
current_trade = None

for i, row in n_signals.reset_index().iterrows():
    if row['action'] == 'SELL':
        # Start tracking a new SELL trade
        current_trade = {
            'entry_idx': i,
            'entry_time': row['timestamp'],
            'entry_price': row['price']
        }
    
    elif row['action'] == 'CLOSE' and current_trade is not None:
        # Complete the trade info
        current_trade['exit_idx'] = i
        current_trade['exit_time'] = row['timestamp']
        current_trade['exit_price'] = row['price']
        current_trade['exit_reason'] = row['reason']
        
        # Check if this is problematic (price went UP but target was "reached")
        if current_trade['exit_price'] > current_trade['entry_price'] and row['reason'] == 'target_reached':
            problematic_trades.append(current_trade)
            print(f"Problem trade found at index {i}")
            print(f"  Entry: SELL at {current_trade['entry_price']} on {current_trade['entry_time']}")
            print(f"  Exit: {row['reason']} at {current_trade['exit_price']} on {current_trade['exit_time']}")
            print(f"  Price change: {(current_trade['exit_price'] - current_trade['entry_price']):.4f}")
            print()
        
        # Reset for next trade
        current_trade = None