In [95]:
#The strategy makes profit when these assets revet back to their mean. using two assets
import pandas as pd
import numpy as np
import MetaTrader5 as mt5
from datetime import datetime, timedelta

import plotly.express as px
from statsmodels.regression import linear_model


In [96]:
mt5.initialize()

timeframe = mt5.TIMEFRAME_H1
start_time = datetime.now() - timedelta(days=720)
end_time   = datetime.now()

def get_ohlc(symbol):
    rates = mt5.copy_rates_range(symbol, timeframe, start_time, end_time)
    rates_df = pd.DataFrame(rates)
    rates_df['time'] = pd.to_datetime(rates_df['time'], unit='s')

    return rates_df

In [97]:
########### Two financial market closely related to each other #######################
#DBK vs CBK
#NVDA vs AMD
#us30 vs ustec
#brent_oil vs wti_oil

symbol1 = 'NVDAm'
symbol2 = 'AMDm'

prices1_df = get_ohlc(symbol1)
prices2_df = get_ohlc(symbol2)

print(pd.DataFrame(prices1_df).tail())
print(pd.DataFrame(prices2_df).tail())

                    time    open    high     low   close  tick_volume  spread  \
4907 2025-09-22 12:00:00  175.21  175.66  174.95  175.49          510      64   
4908 2025-09-22 13:00:00  175.48  175.98  174.58  175.38         2632      16   
4909 2025-09-22 14:00:00  175.39  175.99  174.91  175.68         2705      16   
4910 2025-09-22 15:00:00  175.67  175.90  175.20  175.42         1841      16   
4911 2025-09-22 16:00:00  175.42  183.63  175.42  183.18        11435      16   

      real_volume  
4907            0  
4908            0  
4909            0  
4910            0  
4911            0  
                    time    open    high     low   close  tick_volume  spread  \
4907 2025-09-22 12:00:00  156.31  157.09  156.08  157.05          329      96   
4908 2025-09-22 13:00:00  157.04  161.25  156.75  160.77         2048      24   
4909 2025-09-22 14:00:00  160.82  161.64  160.16  161.53         2591      24   
4910 2025-09-22 15:00:00  161.50  161.94  160.85  161.40         1908

In [98]:
spread_df = prices1_df[['time', 'open']].merge(prices2_df[['time', 'open']], on='time', suffixes=['_1','_2'])
spread_df.tail()

Unnamed: 0,time,open_1,open_2
4907,2025-09-22 12:00:00,175.21,156.31
4908,2025-09-22 13:00:00,175.48,157.04
4909,2025-09-22 14:00:00,175.39,160.82
4910,2025-09-22 15:00:00,175.67,161.5
4911,2025-09-22 16:00:00,175.42,161.39


In [99]:
px.line(spread_df, x='time', y=['open_1', 'open_2'])


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [100]:
fig = px.scatter(x=spread_df['open_1'], y=spread_df['open_2'], trendline='ols')
fig.data[1].update(line_color='red')
fig

In [101]:

model = linear_model.OLS(spread_df['open_1'], spread_df['open_2'])
result = model.fit()

ratio = result.params['open_2']
ratio

### Chose approx. 1 in correlated for more profit

0.773538833345664

->price(asset1) = price(asset2)*ratio + error
error can  be spread or residual

-->The spread is just the price difference between two financial assets.
->spred = price(asset1) - price(asset2)*ratio

In [102]:
spread_df['spread'] = spread_df['open_1'] - spread_df['open_2']*ratio
px.line(spread_df, x='time', y='spread', title='Spread')


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [103]:
#price plot
spread_df['open_2_adj'] = spread_df['open_2']*ratio
px.line(spread_df, x='time', y=['open_1', 'open_2_adj'])



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



Using model ADF-->It shows that stationary characteristics

In [104]:
from statsmodels.tsa.stattools import adfuller


############ I have to choose more correlated assets ##############
def test_stationarity(time_series, significance_level=0.10):
    result = adfuller(time_series)

    print("ADF Statistic:", result[0])
    print("p-value:", result[1])
    print("Critical Values:")
    for key, value in result[4].items():
        print(f"   {key}: {value}")

    print('---\n')

    if result[1] <= significance_level:
        print('Stocks are likely cointegrated')
        return 1
    else:
        print('Stocks are NOT likely cointegrated')
        return 0

print('Total Series Analysis')
result_total = test_stationarity(spread_df['spread'])

Total Series Analysis
ADF Statistic: -0.6862931588104395
p-value: 0.8502680449535757
Critical Values:
   1%: -3.4316849782334793
   5%: -2.862129913399772
   10%: -2.567084012068626
---

Stocks are NOT likely cointegrated


### Bollinger Bands-> use for mean reversion strategy

In [105]:
period = 200
spread_df['sma'] = spread_df['spread'].rolling(period).mean()
spread_df['std'] = spread_df['spread'].rolling(period).std()

spread_df['upper_band'] = spread_df['sma'] + spread_df['std']*1.75
spread_df['lower_band'] = spread_df['sma'] - spread_df['std']*1.75

px.line(spread_df, x='time', y=['spread', 'sma', 'lower_band', 'upper_band'])


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [106]:
def on_bar(bt, data, strategy_params):

    volume = 100
    symbol = 'us30_ut100_sa'

    num_open_positions = bt.get_num_open_positions()

    # entry
    if data['spread'] < data['lower_band'] and num_open_positions == 0:
        bt.open_trade(symbol, 'buy', volume)

    if data['spread'] > data['upper_band'] and num_open_positions == 0:
        bt.open_trade(symbol, 'sell', volume)

    # exit
    if data['spread'] < data['sma'] and num_open_positions > 0:
        bt.close_trades(action='sell')

    if data['spread'] > data['sma'] and num_open_positions > 0:
        bt.close_trades(action='buy')


In [107]:
from lib.backtester import Backtester

bt = Backtester(commission=-0.2)

bt.set_strategy(on_bar)
bt.set_historical_data(spread_df)

bt.run()

trades_df = pd.DataFrame.from_dict(bt.trades, orient='index')
trades_df['cumulative_profit'] = trades_df['net_profit'].cumsum()

Backtest finished - duration 0:00:00.712505


In [108]:
def analyze_backtest(trades_df):
    print('Strategy', trades_df.iloc[0]['symbol'])

    backtest_time_start = pd.to_datetime(spread_df['time'].min())
    backtest_time_end = pd.to_datetime(spread_df['time'].max())
    duration = backtest_time_end - backtest_time_start
    num_trades = len(trades_df.index)

    total_profit = trades_df['net_profit'].sum().round(2)
    num_profit_trades = len(trades_df[trades_df['net_profit'] > 0].index)
    avg_profit = trades_df[trades_df['net_profit'] > 0]['net_profit'].mean()
    avg_loss = trades_df[trades_df['net_profit'] < 0]['net_profit'].mean()

    print('Backtest Time Start', backtest_time_start)
    print('Backtest Time End', backtest_time_end)
    print('Duration', duration)
    print('Total Profit', total_profit)
    print('Num Trades', num_trades)
    print('Num Profit Trades', num_profit_trades)
    print('Avg Profit', avg_profit)
    print('Avg Loss', avg_loss)
    print('======================\n')

analyze_backtest(trades_df)

Strategy us30_ut100_sa
Backtest Time Start 2023-10-03 17:00:00
Backtest Time End 2025-09-22 16:00:00
Duration 719 days 23:00:00
Total Profit 3246.46
Num Trades 26
Num Profit Trades 19
Avg Profit 636.3438424090241
Avg Loss -1263.4386165867743



In [109]:
pnl_fig = px.line(trades_df, x='open_time', y=['cumulative_profit'], color='symbol', width=1200, title='Cumulative PnL')
pnl_fig


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [110]:
bt.visualize_backtest(indicators=['sma', 'upper_band', 'lower_band'])


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



#### 1.More stationary in spread is good for profit on 1H time frame, 
#### 2.low spred == more profit
####