# Trading Strategies powered by Machine Learning - Random Forest Classifier

## Data Processing

### Download data 

In [1]:
import sys
sys.path.append('../../')

In [2]:
from util.load_data import *
from config.constants import *

In [3]:
XAUUSD = load_data()

In [4]:
XAUUSD['D1']

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2007-01-01,635.679,637.103,635.006,635.943,604
2007-01-02,635.791,640.802,635.286,639.143,1282
2007-01-03,639.029,644.663,624.634,626.602,1302
2007-01-04,626.699,630.352,619.600,622.533,1365
2007-01-05,622.268,625.768,601.591,606.841,1284
...,...,...,...,...,...
2023-03-06,1852.255,1858.168,1845.015,1846.255,1380
2023-03-07,1846.225,1851.555,1812.655,1813.865,1380
2023-03-08,1813.855,1824.185,1809.248,1815.175,1379
2023-03-09,1815.195,1835.565,1811.805,1831.415,1366


In [6]:
df = XAUUSD['D1'].copy()

### Add Feature

In [8]:
def get_rsi(df, rsi_period):
    chg = df['Close'].diff(1)
    gain = chg.mask(chg < 0, 0)
    loss = chg.mask(chg > 0, 0)
    avg_gain = gain.ewm(com=rsi_period-1, min_periods=rsi_period).mean()
    avg_loss = loss.ewm(com=rsi_period-1, min_periods=rsi_period).mean()
    rs = abs(avg_gain/avg_loss)
    rsi = 100 - (100/(1+rs))
    return rsi

In [9]:
# relative strength index
df['rsi14'] = get_rsi(df, 14)

# moving averages
df['sma9'] = df['Close'].rolling(9).mean()
df['sma180'] = df['Close'].rolling(180).mean()
df['sma9_var'] = (df['Close']/df['sma9'])-1
df['sma180_var'] = (df['Close']/df['sma180'])-1

In [10]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,rsi14,sma9,sma180,sma9_var,sma180_var
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2007-01-01,635.679,637.103,635.006,635.943,604,,,,,
2007-01-02,635.791,640.802,635.286,639.143,1282,,,,,
2007-01-03,639.029,644.663,624.634,626.602,1302,,,,,
2007-01-04,626.699,630.352,619.600,622.533,1365,,,,,
2007-01-05,622.268,625.768,601.591,606.841,1284,,,,,
...,...,...,...,...,...,...,...,...,...,...
2023-03-06,1852.255,1858.168,1845.015,1846.255,1380,49.002419,1832.593111,1771.087789,0.007455,0.042441
2023-03-07,1846.225,1851.555,1812.655,1813.865,1380,37.081483,1832.948667,1771.306828,-0.010411,0.024026
2023-03-08,1813.855,1824.185,1809.248,1815.175,1379,37.741171,1833.343111,1771.452056,-0.009910,0.024682
2023-03-09,1815.195,1835.565,1811.805,1831.415,1366,45.385938,1834.933111,1771.660094,-0.001917,0.033728


In [13]:
df = df.dropna()

In [14]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,rsi14,sma9,sma180,sma9_var,sma180_var
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2007-07-29,660.723,661.748,659.856,660.638,143,44.277086,673.647889,659.119572,-0.019313,0.002304
2007-07-30,660.889,665.498,659.476,664.975,1346,49.273715,672.392889,659.280861,-0.011032,0.008637
2007-07-31,665.045,668.644,662.708,663.438,1346,47.643242,670.226556,659.415833,-0.010129,0.006100
2007-08-01,663.438,667.347,658.353,664.895,1367,49.354097,668.279111,659.628572,-0.005064,0.007984
2007-08-02,665.148,666.919,662.122,665.128,1331,49.637515,666.434889,659.865211,-0.001961,0.007976
...,...,...,...,...,...,...,...,...,...,...
2023-03-06,1852.255,1858.168,1845.015,1846.255,1380,49.002419,1832.593111,1771.087789,0.007455,0.042441
2023-03-07,1846.225,1851.555,1812.655,1813.865,1380,37.081483,1832.948667,1771.306828,-0.010411,0.024026
2023-03-08,1813.855,1824.185,1809.248,1815.175,1379,37.741171,1833.343111,1771.452056,-0.009910,0.024682
2023-03-09,1815.195,1835.565,1811.805,1831.415,1366,45.385938,1834.933111,1771.660094,-0.001917,0.033728


In [16]:
df.shift(1)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,rsi14,sma9,sma180,sma9_var,sma180_var
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2007-07-29,,,,,,,,,,
2007-07-30,660.723,661.748,659.856,660.638,143.0,44.277086,673.647889,659.119572,-0.019313,0.002304
2007-07-31,660.889,665.498,659.476,664.975,1346.0,49.273715,672.392889,659.280861,-0.011032,0.008637
2007-08-01,665.045,668.644,662.708,663.438,1346.0,47.643242,670.226556,659.415833,-0.010129,0.006100
2007-08-02,663.438,667.347,658.353,664.895,1367.0,49.354097,668.279111,659.628572,-0.005064,0.007984
...,...,...,...,...,...,...,...,...,...,...
2023-03-06,1854.644,1855.314,1852.125,1852.255,60.0,51.870764,1830.022000,1770.690361,0.012149,0.046064
2023-03-07,1852.255,1858.168,1845.015,1846.255,1380.0,49.002419,1832.593111,1771.087789,0.007455,0.042441
2023-03-08,1846.225,1851.555,1812.655,1813.865,1380.0,37.081483,1832.948667,1771.306828,-0.010411,0.024026
2023-03-09,1813.855,1824.185,1809.248,1815.175,1379.0,37.741171,1833.343111,1771.452056,-0.009910,0.024682


In [None]:
df['signal'] = np.

In [None]:
self.data.df['inside_bar'] = np.where(((self.data.df['High'] < self.data.df['High'].shift(
    1)) & (self.data.df['Low'] > self.data.df['Low'].shift(1))), True, False)