# **Model training**

## **1. Data loading**

We have to, first, import our data to work with it.

In [1]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
btc = pd.read_csv('coins_fe_data/btc.csv')
eth = pd.read_csv('coins_fe_data/eth.csv')
sol = pd.read_csv('coins_fe_data/sol.csv')
bnb = pd.read_csv('coins_fe_data/bnb.csv')

sol.head()

Unnamed: 0.1,Unnamed: 0,index,Date,Close,High,Low,Open,Volume,3d-MA,10d-MA,50d-MA,100d-MA,RSI,MACD,Signal,Histogram,Middle Band,Upper Band,Lower Band
0,0,2,2020-04-10,0.951054,1.313487,0.694187,0.832005,87364276,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,3,2020-04-11,0.776819,1.049073,0.76502,0.951054,43862444,0.951054,0.951054,0.951054,0.951054,0.0,-0.013899,-0.00278,-0.011119,0.0,0.0,0.0
2,2,4,2020-04-12,0.882507,0.95667,0.762426,0.785448,38736897,0.863936,0.863936,0.863936,0.863936,0.0,-0.016199,-0.005464,-0.010736,0.0,0.0,0.0
3,3,5,2020-04-13,0.777832,0.891603,0.773976,0.89076,18211285,0.870127,0.870127,0.870127,0.870127,0.0,-0.026167,-0.009604,-0.016563,0.0,0.0,0.0
4,4,6,2020-04-14,0.661925,0.796472,0.628169,0.777832,16747614,0.812386,0.847053,0.847053,0.847053,0.0,-0.042924,-0.016268,-0.026656,0.0,0.0,0.0


## **2. Data separation**

We need to separate our data, between *input* variables out the ones to *predict*.
In this project, we'll try to predict whether the price will go *up* or *down*, and by how much it has done so.
That's why we'll quickly add a new column that stores the *difference between the current and the previous day* pf the *close* price.

In [3]:
btc['Close Diff'] = btc['Close'].diff()
eth['Close Diff'] = eth['Close'].diff()
sol['Close Diff'] = sol['Close'].diff()
bnb['Close Diff'] = bnb['Close'].diff()

btc.loc[0, 'Close Diff'] = 0
eth.loc[0, 'Close Diff'] = 0
sol.loc[0, 'Close Diff'] = 0
bnb.loc[0, 'Close Diff'] = 0

btc.head()

Unnamed: 0.1,Unnamed: 0,index,Date,Close,High,Low,Open,Volume,3d-MA,10d-MA,50d-MA,100d-MA,RSI,MACD,Signal,Histogram,Middle Band,Upper Band,Lower Band,Close Diff
0,0,2,2014-09-17,457.334015,468.174011,452.421997,465.864014,21056800,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,3,2014-09-18,424.440002,456.859985,413.104004,456.859985,34483200,457.334015,457.334015,457.334015,457.334015,0.0,-2.624024,-0.524805,-2.099219,0.0,0.0,0.0,-32.894012
2,2,4,2014-09-19,394.79599,427.834991,384.532013,424.102997,37919700,440.887009,440.887009,440.887009,440.887009,0.0,-7.014744,-1.822793,-5.191951,0.0,0.0,0.0,-29.644012
3,3,5,2014-09-20,408.903992,423.29599,389.882996,394.673004,36863600,425.523336,425.523336,425.523336,425.523336,0.0,-9.249402,-3.308115,-5.941288,0.0,0.0,0.0,14.108002
4,4,6,2014-09-21,398.821014,412.425995,393.181,408.084991,26580100,409.379995,421.3685,421.3685,421.3685,0.0,-11.699137,-4.986319,-6.712818,0.0,0.0,0.0,-10.082977


In [4]:
btc.columns

Index(['Unnamed: 0', 'index', 'Date', 'Close', 'High', 'Low', 'Open', 'Volume',
       '3d-MA', '10d-MA', '50d-MA', '100d-MA', 'RSI', 'MACD', 'Signal',
       'Histogram', 'Middle Band', 'Upper Band', 'Lower Band', 'Close Diff'],
      dtype='object')

In [5]:
class Dataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.float32).unsqueeze(1)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

Separating our input (Xs) and the values to predics (ys)

In [9]:
X_btc = btc[['3d-MA', '10d-MA', '50d-MA', '100d-MA', 'RSI', 'MACD', 'Signal',
       'Histogram', 'Middle Band', 'Upper Band', 'Lower Band', 'Close Diff']]
X_eth = eth[['3d-MA', '10d-MA', '50d-MA', '100d-MA', 'RSI', 'MACD', 'Signal',
       'Histogram', 'Middle Band', 'Upper Band', 'Lower Band', 'Close Diff']]
X_sol = sol[['3d-MA', '10d-MA', '50d-MA', '100d-MA', 'RSI', 'MACD', 'Signal',
       'Histogram', 'Middle Band', 'Upper Band', 'Lower Band', 'Close Diff']]
X_bnb = bnb[['3d-MA', '10d-MA', '50d-MA', '100d-MA', 'RSI', 'MACD', 'Signal',
       'Histogram', 'Middle Band', 'Upper Band', 'Lower Band', 'Close Diff']]

y_btc = btc['Close Diff']
y_eth = eth['Close Diff']
y_sol = sol['Close Diff']
y_bnb = bnb['Close Diff']

X_btc.head()

Unnamed: 0,3d-MA,10d-MA,50d-MA,100d-MA,RSI,MACD,Signal,Histogram,Middle Band,Upper Band,Lower Band,Close Diff
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,457.334015,457.334015,457.334015,457.334015,0.0,-2.624024,-0.524805,-2.099219,0.0,0.0,0.0,-32.894012
2,440.887009,440.887009,440.887009,440.887009,0.0,-7.014744,-1.822793,-5.191951,0.0,0.0,0.0,-29.644012
3,425.523336,425.523336,425.523336,425.523336,0.0,-9.249402,-3.308115,-5.941288,0.0,0.0,0.0,14.108002
4,409.379995,421.3685,421.3685,421.3685,0.0,-11.699137,-4.986319,-6.712818,0.0,0.0,0.0,-10.082977


Now separating the data for traing and for testing:

In [11]:
X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.2, random_state=42)
X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.2, random_state=42)
X_sol_train, X_sol_test, y_sol_train, y_sol_test = train_test_split(X_sol, y_sol, test_size=0.2, random_state=42)
X_bnb_train, X_bnb_test, y_bnb_train, y_bnb_test = train_test_split(X_bnb, y_bnb, test_size=0.2, random_state=42)

y_sol_test.head()

1613    5.036102
65     -0.021866
700    -2.194092
938     2.936974
1623    8.844421
Name: Close Diff, dtype: float64

And normalizing our tensors:

In [17]:
scaler = StandardScaler()

X_btc_train_scaled = scaler.fit_transform(X_btc_train)
X_btc_test_scaled = scaler.transform(X_btc_test)

X_eth_train_scaled = scaler.fit_transform(X_eth_train)
X_eth_test_scaled = scaler.transform(X_eth_test)

X_sol_train_scaled = scaler.fit_transform(X_sol_train)
X_sol_test_scaled = scaler.transform(X_sol_test)

X_bnb_train_scaled = scaler.fit_transform(X_bnb_train)
X_bnb_test_scaled = scaler.transform(X_bnb_test)

X_bnb_train_scaled

array([[ 0.51777961,  0.56705493,  0.91602914, ...,  0.77232402,
         0.59530037,  1.4496332 ],
       [ 1.63755562,  1.66095068,  1.56677538, ...,  1.45238363,
         1.79763964,  0.88083906],
       [-0.08912866, -0.07934173, -0.05785906, ..., -0.1635365 ,
         0.02872811,  0.20275867],
       ...,
       [-0.93082596, -0.9246954 , -0.91823304, ..., -0.92760285,
        -0.89641829,  0.05347359],
       [ 0.38894657,  0.72096569,  1.37835777, ...,  2.09924704,
         0.05776662,  2.7335232 ],
       [-1.0117943 , -0.99770721, -0.9623075 , ..., -0.95926375,
        -0.99598185, -0.01546834]], shape=(2152, 12))

Let's make our datasets and dataloaders:

In [19]:
btc_train_dataset = Dataset(X_btc_train_scaled, y_btc_train)
btc_test_dataset = Dataset(X_btc_test_scaled, y_btc_test)

eth_train_dataset = Dataset(X_eth_train_scaled, y_eth_train)
eth_test_dataset = Dataset(X_eth_test_scaled, y_eth_test)

sol_train_dataset = Dataset(X_sol_train_scaled, y_sol_train)
sol_test_dataset = Dataset(X_sol_test_scaled, y_sol_test)

bnb_train_dataset = Dataset(X_bnb_train_scaled, y_bnb_train)
bnb_test_dataset = Dataset(X_bnb_test_scaled, y_bnb_test)

bnb_test_dataset

<__main__.Dataset at 0x79e5e2442650>

In [20]:
btc_train_loader = DataLoader(btc_train_dataset, batch_size=32, shuffle=True)
btc_test_loader = DataLoader(btc_test_dataset, batch_size=32, shuffle=False)

eth_train_loader = DataLoader(eth_train_dataset, batch_size=32, shuffle=True)
eth_test_loader = DataLoader(eth_test_dataset, batch_size=32, shuffle=False)

sol_train_loader = DataLoader(sol_train_dataset, batch_size=32, shuffle=True)
sol_test_loader = DataLoader(sol_test_dataset, batch_size=32, shuffle=False)

bnb_train_loader = DataLoader(bnb_train_dataset, batch_size=32, shuffle=True)
bnb_test_loader = DataLoader(bnb_test_dataset, batch_size=32, shuffle=False)

eth_train_dataset

<__main__.Dataset at 0x79e5e23c68b0>