In [10]:
'''
Strategy 3: From Literature: buy and hold
'''

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from backtesting import Backtest, Strategy
from datetime import datetime
import talib
import joblib
import matplotlib.pyplot as plt

# Load data
data = pd.read_csv('./EURUSD_D1.csv')
data['Time'] = pd.to_datetime(data['Time'], format='%Y-%m-%d %H:%M:%S')
data.set_index('Time', inplace=True)


#### Feature Engineering ####
def multivariateFeatureEngineering(data):

    data['50_sma'] = data['Close'].rolling(window=50).mean() 
    data['200_sma'] = data['Close'].rolling(window=200).mean() 
    data['50_ema'] = data['Close'].ewm(span=50, adjust=False).mean()
    data['100_ema'] = data['Close'].ewm(span=100, adjust=False).mean()
    data['12_ema'] = data['Close'].ewm(span=12, adjust=False).mean()
    data['26_ema'] = data['Close'].ewm(span=26, adjust=False).mean()
    data['MACD_line'] = data['12_ema']-data['26_ema'] # calculate the MACD line
    data['Signal_line'] = data['MACD_line'].ewm(span=9, adjust=False).mean() # 9-preiod ema signal calculated from the Macdline
    data['ADX'] = talib.ADX(data['High'], data['Low'], data['Close'], timeperiod=14)
    data['RSI'] = talib.RSI(data['Close'], timeperiod=14)
    data['stoch_k'], data['stoch_d'] = talib.STOCH(data['High'], data['Low'], data['Close'], 
                                                fastk_period=14, slowk_period=3, slowd_period=3)
    data['ATR'] = talib.ATR(data['High'], data['Low'], data['Close'], timeperiod=14)
  
    data = data.dropna() # drop rows that have NA
    data = data.drop(columns=['12_ema', '26_ema'])

    return data

###### Generate lag feaures #######
def multivariateFeatureLagMultiStep(data, n_past, future_steps, target_column):
    features = []
    response = []

    max_future_step = max(future_steps)
    num_features = data.shape[1]
    group_feature_lags =  1 # change grouping of lagged features

    # Adjust the loop to prevent index out of bounds
    for i in range(n_past, len(data) - max_future_step + 1):

        if group_feature_lags==1:
                
            lagged_features = []

            for feature_idx in range(num_features):
                feature_lags = data.iloc[i - n_past:i, feature_idx].values 
                lagged_features.extend(feature_lags) 

        elif group_feature_lags==0:
            features.append(data.iloc[i - n_past:i, :].values)  # Take all columns as features

        # Use .iloc for integer-based indexing and .values to get a NumPy array

        if group_feature_lags==1:
            features.append(lagged_features)

        # Extract the target values at specified future steps using .iloc
        response.append([data.iloc[i + step - 1, target_column] for step in future_steps])

    # Convert lists to NumPy arrays after the loop
    features = np.array(features)  # Shape: (num_samples, n_past, num_features)
    response = np.array(response)  # Shape: (num_samples, len(future_steps))

    # Flatten the features to 2D array: (num_samples, n_past * num_features)
    features_flat = features.reshape(features.shape[0], -1)

    return features_flat, response


############################# Load saved Best model information ##################################################

best_model_info= [1, 1, ['Open', 'Low', 'sma_50', 'sma_200', 'ema_50', 'MACD_line', 'Signal_line', 'RSI', 'ATR', 'ema_100', 'High', 'Close']]

best_model_data = joblib.load('./model _weights/best_model_weights_and_scaler.pkl')
scaler = best_model_data['scaler']
weights = best_model_data['weights']
bias = best_model_data['bias']

lookback_window = best_model_info[0]
features = best_model_info[2]



################################# # Backtesting Strategy using Linear Regression #########################################

class LinearRegressionStrategy(Strategy):
    profit_threshold = 130  # set threshold?
    def init(self):
        self.index = 1       
    
    ######### Strategy Exectution #############
    def next(self):
        if self.index >= lookback_window:
           
           if not self.position:
               self.buy(size=0.1)

        self.index += 1

    def profit_factor_cal(self):
        trades = self.closed_trades

        gross_profit = sum(trade.pl for trade in trades if trade.pl>0)
        gross_loss = abs(sum(trade.pl for trade in trades if trade.pl <0))

        profit_fact = gross_profit/gross_loss if gross_loss !=0 else float('inf')
        
        return profit_fact
    

test_data = data.iloc[-481:]  # This assumes X_test is at the end of the dataset
bt = Backtest(test_data, LinearRegressionStrategy, cash =10000, commission=.002, margin=.05, trade_on_close=True)
stats =  bt.run()
bt.plot() # trade execution

strategy_instance = stats._strategy  # This accesses the strategy instance
profit_factor = strategy_instance.profit_factor_cal()  # Call the function to calculate profit factor

print(profit_factor)

stats # trade statistics



inf


Start                     2023-03-30 00:00:00
End                       2024-10-11 00:00:00
Duration                    561 days 00:00:00
Exposure Time [%]                     99.5842
Equity Final [$]                 10134.655636
Equity Peak [$]                  10694.287486
Return [%]                           1.346556
Buy & Hold Return [%]                0.286131
Return (Ann.) [%]                    0.703226
Volatility (Ann.) [%]               11.344215
Sharpe Ratio                          0.06199
Sortino Ratio                         0.09101
Calmar Ratio                         0.053127
Max. Drawdown [%]                  -13.236609
Avg. Drawdown [%]                   -4.399463
Max. Drawdown Duration      452 days 00:00:00
Avg. Drawdown Duration       93 days 00:00:00
# Trades                                    1
Win Rate [%]                            100.0
Best Trade [%]                       0.673305
Worst Trade [%]                      0.673305
Avg. Trade [%]                    