# Data preparation for training Machine Learning Models 

* In this notebook we will be using the price + indicator data which has been prepared in the previous notebook. We will set the features columns and the target data column. Then we will set aside a part of the data for testing.

* We will use the GridSearchCV method of the scikit-learn library and check which model is giving the best score for training and validation

In [1]:
import pandas as pd

### First read the data which has been prepared in the previous notebook 

In [2]:
df_data = pd.read_csv('Resources/Data_plus_indicators.csv', index_col='Date', infer_datetime_format=True)
df_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,SMA_agg,RSI_ratio,CCI,ADX,ADX_dirn,ATR_ratio,BBands_high,BBands_low,Currency,Daily_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-07-07,347.67,348.87,340.31,342.07,176.011216,0.991309,1.093616,190.073169,21.296837,1.0,0.986212,0.98944,1.047276,ETH/AUD,-0.006506
2020-07-08,345.79,358.66,345.37,356.03,251.112969,0.996916,1.144218,193.832493,23.299165,1.0,1.023409,0.967058,1.090857,ETH/AUD,0.04081
2020-07-09,353.59,355.14,346.51,348.38,255.909438,0.999427,1.080139,112.591647,25.10126,1.0,1.019086,0.99486,1.064898,ETH/AUD,-0.021487
2020-07-10,348.41,348.78,341.66,345.21,396.937187,1.002581,1.053793,58.115197,25.140074,1.0,1.003178,1.008412,1.049545,ETH/AUD,-0.009099
2020-07-11,348.06,348.6,344.56,346.31,9.862143,1.004485,1.060391,56.503935,25.175006,1.0,0.972754,1.008995,1.049899,ETH/AUD,0.003186


# Preparing the data for training the classifier models

In [8]:
curr_list = ['ETH/AUD', 'LTC/AUD', 'ADA/AUD']
df_data = df_data.loc[ df_data.Currency.isin(curr_list)  ]
df_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,SMA_agg,RSI_ratio,CCI,ADX,ADX_dirn,ATR_ratio,BBands_high,BBands_low,Currency,Daily_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-07-07,347.67,348.87,340.31,342.07,176.011216,0.991309,1.093616,190.073169,21.296837,1.0,0.986212,0.989440,1.047276,ETH/AUD,-0.006506
2020-07-08,345.79,358.66,345.37,356.03,251.112969,0.996916,1.144218,193.832493,23.299165,1.0,1.023409,0.967058,1.090857,ETH/AUD,0.040810
2020-07-09,353.59,355.14,346.51,348.38,255.909438,0.999427,1.080139,112.591647,25.101260,1.0,1.019086,0.994860,1.064898,ETH/AUD,-0.021487
2020-07-10,348.41,348.78,341.66,345.21,396.937187,1.002581,1.053793,58.115197,25.140074,1.0,1.003178,1.008412,1.049545,ETH/AUD,-0.009099
2020-07-11,348.06,348.60,344.56,346.31,9.862143,1.004485,1.060391,56.503935,25.175006,1.0,0.972754,1.008995,1.049899,ETH/AUD,0.003186
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-20,247.40,247.72,245.41,247.49,49.005354,1.087711,1.064237,83.970777,42.387776,1.0,1.009217,1.005018,1.062699,LTC/AUD,0.010163
2021-08-21,256.82,256.82,248.02,251.81,33.729445,1.080462,1.077500,117.177052,42.483080,1.0,0.995961,0.995908,1.074777,LTC/AUD,0.017455
2021-08-22,257.32,264.64,250.94,260.41,102.015709,1.078678,1.098181,161.585472,43.298567,1.0,1.002420,0.975167,1.095232,LTC/AUD,0.034153
2021-08-23,260.41,265.40,256.34,260.27,186.925389,1.067754,1.096833,142.648975,44.100214,1.0,0.988888,0.984965,1.093603,LTC/AUD,-0.000538
