In [4]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas_ta as ta
import numpy as np


#### Train the models

In [None]:
# load the train data
files = {
    '1m': 'USDJPY_Candlestick_1_M_BID_01.01.2025-18.01.2025.csv',
    '1h': 'PG.USUSD_Candlestick_1_Hour_BID_01.09.2024-18.01.2025.csv',
    '4h': 'BRENT.CMDUSD_Candlestick_4_Hour_BID_01.10.2024-18.01.2025.csv',
    '1d': 'USA500.IDXUSD_Candlestick_1_D_BID_01.01.2022-18.01.2025.csv'
}

# Define a function to calculate indicators with interval-specific lengths

def calculate_indicators(df, interval):
    if interval == '1m':  # 1-Minute Data
        ema_length = 9
        rsi_length = 14
        bb_length = 20
        atr_length = 14
    elif interval == '1h':  # 1-Hour Data
        ema_length = 20
        rsi_length = 14
        bb_length = 20
        atr_length = 14
    elif interval == '4h':  # 4-Hour Data
        ema_length = 50
        rsi_length = 14
        bb_length = 20
        atr_length = 14
    elif interval == '1d':  # 1-Day Data
        ema_length = 50
        rsi_length = 14
        bb_length = 20
        atr_length = 14
    else:
        raise ValueError("Unsupported interval")

    # Exponential Moving Average (EMA)
    df['EMA'] = ta.ema(df['Close'], length=ema_length)

    # Relative Strength Index (RSI)
    df['RSI'] = ta.rsi(df['Close'], length=rsi_length)

    # Bollinger Bands
    bb = ta.bbands(df['Close'], length=bb_length, std=2)
    df['BB_upper'] = bb['BBU_20_2.0']
    df['BB_lower'] = bb['BBL_20_2.0']
    df['BB_middle'] = bb['BBM_20_2.0']

    # Average True Range (ATR)
    df['ATR'] = ta.atr(df['High'], df['Low'], df['Close'], length=atr_length)

    # On-Balance Volume (OBV)
    df['OBV'] = ta.obv(df['Close'], df['Volume'])

    return df


results = {}

for interval, file in files.items():
    # Load and preprocess data
    df = pd.read_csv(file)
    df['Local time'] = pd.to_datetime(df['Local time'], format='%d.%m.%Y %H:%M:%S.%f GMT%z', errors='coerce')
    df.dropna(subset=['Local time'], inplace=True)
    df.set_index('Local time', inplace=True)

    # Calculate indicators
    results[interval] = calculate_indicators(df, interval)

# Train Random Forest models for each interval
def train_random_forest(df, interval):
    # Prepare the data
    features = ['Open', 'High', 'Low', 'Volume', 'EMA', 'RSI', 'BB_upper', 'BB_lower', 'BB_middle', 'ATR', 'OBV']
    df.dropna(subset=features, inplace=True)  # Drop rows with missing indicator values

    X = df[features]
    y = df['Close']

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize and train the Random Forest Regressor
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print(f"{interval} Random Forest Model MSE: {mse}")

    return model

# Train models for each interval
models = {}

for interval, df in results.items():
    print(f"Training Random Forest model for {interval} interval...")
    models[interval] = train_random_forest(df, interval)


  df['Local time'] = pd.to_datetime(df['Local time'], format='%d.%m.%Y %H:%M:%S.%f GMT%z', errors='coerce')
  df['Local time'] = pd.to_datetime(df['Local time'], format='%d.%m.%Y %H:%M:%S.%f GMT%z', errors='coerce')
  df['Local time'] = pd.to_datetime(df['Local time'], format='%d.%m.%Y %H:%M:%S.%f GMT%z', errors='coerce')


Training Random Forest model for 1m interval...
1m Random Forest Model MSE: 8.865623884265893e-05
Training Random Forest model for 1h interval...
1h Random Forest Model MSE: 0.027396336228485862
Training Random Forest model for 4h interval...
4h Random Forest Model MSE: 0.029821906328452476
Training Random Forest model for 1d interval...
1d Random Forest Model MSE: 473.826449292038


### In sample testing

In [13]:
# Restrict data to the last 1000 observations
def restrict_data_to_1000(df):
    return df.tail(1000)

for interval in results.keys():
    results[interval] = restrict_data_to_1000(results[interval])

In [None]:

def simulate_trading_with_tuning(df, model, slippage=0.002, commission=0.004, max_positions=10, capital_per_position=0.1, stop_loss=0.03, take_profit=0.05):

    """
        df - dataframe with indicators and price data
        model - trained Random Forest model
        slippage - slippage percentage per trade
        commision - commission percentage per trade
        max_mositions - maximum open positions allowed
        capital_per_position - percentage of capital allocated per position
        stop_loss - stop-loss threshold as a percentage
        take_profit - take-profit threshold as a percentage

    """

    capital = 100000  # Starting capital
    cash = capital
    positions = []  # List to store open positions
    portfolio_value = [capital]  # Portfolio value over time

    for i in range(len(df) - 1):
        current_row = df.iloc[i]
        next_row = df.iloc[i + 1]

        # Make a prediction for the next period
        features = ['Open', 'High', 'Low', 'Volume', 'EMA', 'RSI', 'BB_upper', 'BB_lower', 'BB_middle', 'ATR', 'OBV']
        if current_row[features].isnull().any():
            continue

        # Predict the next closing price
        prediction = model.predict(current_row[features].to_frame().T)[0]
        predicted_change = (prediction - current_row['Close']) / current_row['Close']

        # Open new positions if within constraints
        if predicted_change > 0 and len(positions) < max_positions:
            position_size = capital_per_position * cash
            buy_price = current_row['Close'] * (1 + slippage)
            cash -= position_size * (1 + commission)
            positions.append({
                'buy_price': buy_price,
                'size': position_size / buy_price,
                'stop_loss': buy_price * (1 - stop_loss),
                'take_profit': buy_price * (1 + take_profit)
            })

        # Update existing positions
        for position in positions[:]:
            current_price = next_row['Close'] * (1 - slippage)
            # Check for stop-loss or take-profit
            if current_price <= position['stop_loss'] or current_price >= position['take_profit']:
                sell_value = position['size'] * current_price * (1 - commission)
                cash += sell_value
                positions.remove(position)

        # Calculate portfolio value
        position_value = sum(p['size'] * next_row['Close'] for p in positions)
        portfolio_value.append(cash + position_value)

    # Calculate performance metrics
    portfolio_value = np.array(portfolio_value)
    returns = np.diff(portfolio_value) / portfolio_value[:-1]

    return_rate = (portfolio_value[-1] - capital) / capital
    max_drawdown = np.min(portfolio_value / np.maximum.accumulate(portfolio_value)) - 1
    sharpe_ratio = (np.mean(returns) - 0.03 / 252) / np.std(returns)

    return {
        'return_rate': return_rate,
        'max_drawdown': max_drawdown,
        'sharpe_ratio': sharpe_ratio
    }


# Define stop-loss and take-profit ranges for tuning
stop_loss_range = [0.02, 0.03, 0.04]  # 2%, 3%, 4%
take_profit_range = [0.04, 0.05, 0.06]  # 4%, 5%, 6%

# Iterate through intervals and tune parameters
optimized_results = {}
for interval, model in models.items():
    print(f"Tuning stop-loss and take-profit for {interval} interval...")
    df = results[interval]
    best_metrics = {'return_rate': float('-inf')}
    best_params = None

    for stop_loss in stop_loss_range:
        for take_profit in take_profit_range:
            metrics = simulate_trading_with_tuning(
                df, model,
                stop_loss=stop_loss,
                take_profit=take_profit
            )

            if metrics['return_rate'] > best_metrics['return_rate']:
                best_metrics = metrics
                best_params = {'stop_loss': stop_loss, 'take_profit': take_profit}

    optimized_results[interval] = {
        'best_params': best_params,
        'metrics': best_metrics
    }

# Display optimized results
for interval, result in optimized_results.items():
    print(f"\n{interval} Interval Optimized Performance:")
    print(f"Best Stop-Loss: {result['best_params']['stop_loss']:.2%}")
    print(f"Best Take-Profit: {result['best_params']['take_profit']:.2%}")
    print(f"Return Rate: {result['metrics']['return_rate']:.2%}")
    print(f"Maximum Drawdown: {result['metrics']['max_drawdown']:.2%}")
    print(f"Sharpe Ratio: {result['metrics']['sharpe_ratio']:.2f}")


Tuning stop-loss and take-profit for 1m interval...
Tuning stop-loss and take-profit for 1h interval...
Tuning stop-loss and take-profit for 4h interval...
Tuning stop-loss and take-profit for 1d interval...

1m Interval Optimized Performance:
Best Stop-Loss: 2.00%
Best Take-Profit: 4.00%
Return Rate: -0.39%
Maximum Drawdown: -0.39%
Sharpe Ratio: -3.03

1h Interval Optimized Performance:
Best Stop-Loss: 3.00%
Best Take-Profit: 4.00%
Return Rate: -6.38%
Maximum Drawdown: -7.58%
Sharpe Ratio: -0.18

4h Interval Optimized Performance:
Best Stop-Loss: 2.00%
Best Take-Profit: 6.00%
Return Rate: -0.58%
Maximum Drawdown: -7.77%
Sharpe Ratio: -0.04

1d Interval Optimized Performance:
Best Stop-Loss: 4.00%
Best Take-Profit: 6.00%
Return Rate: 7.03%
Maximum Drawdown: -11.96%
Sharpe Ratio: -0.01


### Out of sample (time series for class)

In [18]:
from itertools import product
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error

# Function to simulate trading with given parameters
def simulate_trading(df, model, stop_loss, take_profit, slippage=0.002, commission=0.004, max_positions=10, capital_per_position=0.1, prediction_days=10, open_threshold=0.02):

    capital = 100000  # Starting capital
    cash = capital
    positions = []  # List to store open positions
    portfolio_value = [capital]  # Portfolio value over time

    for i in range(len(df) - prediction_days):
        current_row = df.iloc[i]
        future_rows = df.iloc[i + 1:i + prediction_days + 1]

        # Make a prediction for the next period
        features = ['Open', 'High', 'Low', 'Volume', 'EMA', 'RSI', 'BB_upper', 'BB_lower', 'BB_middle', 'ATR', 'OBV']
        if current_row[features].isnull().any():
            continue

        # Predict the future closing prices
        future_predictions = model.predict(future_rows[features])
        max_predicted_price = max(future_predictions)
        predicted_change = (max_predicted_price - current_row['Close']) / current_row['Close']

        # Open new positions if within constraints and above threshold
        if predicted_change > open_threshold and len(positions) < max_positions and cash >= 0.1 * capital:
            position_size = 0.1 * capital
            buy_price = current_row['Close'] * (1 + slippage)
            cash -= position_size * (1 + commission)
            positions.append({
                'buy_price': buy_price,
                'size': position_size / buy_price,
                'stop_loss': buy_price * (1 - stop_loss),
                'take_profit': buy_price * (1 + take_profit),
                'trailing_stop': buy_price * (1 + take_profit) * 0.98  # Trailing stop starts 2% below the take profit
            })

        # Update existing positions
        next_row = df.iloc[i + 1]
        for position in positions[:]:
            current_price = next_row['Close'] * (1 - slippage)

            # Adjust trailing stop if the price exceeds the current take profit
            if current_price >= position['take_profit']:
                position['trailing_stop'] = max(position['trailing_stop'], current_price * 0.98)

            # Check for stop-loss or trailing stop
            if current_price <= position['stop_loss'] or current_price <= position['trailing_stop']:
                sell_value = position['size'] * current_price * (1 - commission)
                cash += sell_value
                positions.remove(position)

        # Calculate portfolio value
        position_value = sum(p['size'] * next_row['Close'] for p in positions)
        portfolio_value.append(cash + position_value)

    # Calculate performance metrics
    portfolio_value = np.array(portfolio_value)
    returns = np.diff(portfolio_value) / portfolio_value[:-1]
    if len(returns) == 0 or np.std(returns) == 0:  # Handle potential divide by zero
        sharpe_ratio = 0
    else:
        sharpe_ratio = (np.mean(returns) - 0.03 / 252) / np.std(returns)

    return_rate = (portfolio_value[-1] - capital) / capital
    max_drawdown = np.min(portfolio_value / np.maximum.accumulate(portfolio_value)) - 1

    return {
        'return_rate': return_rate,
        'max_drawdown': max_drawdown,
        'sharpe_ratio': sharpe_ratio
    }




In [None]:
# fill the data with file names provided by the teacher
class_files = {
    '1m': '',
    '1h': '',
    '4h': '',
    '1d': ''
}

# import the data and calculate the indicators

class_data = {}

for interval, file in files.items():

    df = pd.read_csv(file)
    df['Local time'] = pd.to_datetime(df['Local time'], format='%d.%m.%Y %H:%M:%S.%f GMT%z', errors='coerce')
    df.dropna(subset=['Local time'], inplace=True)
    df.set_index('Local time', inplace=True)


    class_data[interval] = calculate_indicators(df, interval)


In [None]:
# simulate trading


for interval, file in class_data.items():
    if interval == '1m':  # 1-Minute Data
        results_1m = simulate_trading(file, models[interval], stop_loss=0.02, take_profit=0.04)
        print(results_1m)
    elif interval == '1h':  # 1-Hour Data
        results_1h = simulate_trading(file, models[interval], stop_loss=0.03, take_profit=0.04)
        print(results_1h)
    elif interval == '4h':  # 4-Hour Data
        results_4h = simulate_trading(file, models[interval], stop_loss=0.02, take_profit=0.06)
        print(results_4h)
    elif interval == '1d':  # 1-Day Data
        results_1d = simulate_trading(file, models[interval], stop_loss=0.04, take_profit=0.04)
        print(results_1d)
