<a href="https://colab.research.google.com/github/PasechnikDarya/ML-practice/blob/main/lstm_bitcoin_price.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --quiet pytorch-lightning==1.2.5

[K     |████████████████████████████████| 826 kB 8.7 MB/s 
[K     |████████████████████████████████| 118 kB 19.5 MB/s 
[K     |████████████████████████████████| 269 kB 46.8 MB/s 
[K     |████████████████████████████████| 272 kB 42.8 MB/s 
[K     |████████████████████████████████| 829 kB 38.2 MB/s 
[K     |████████████████████████████████| 1.3 MB 42.0 MB/s 
[K     |████████████████████████████████| 294 kB 40.9 MB/s 
[K     |████████████████████████████████| 142 kB 73.5 MB/s 
[?25h  Building wheel for future (setup.py) ... [?25l[?25hdone
  Building wheel for PyYAML (setup.py) ... [?25l[?25hdone


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np

import pytorch_lightning as pl
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from pylab import rcParams
from matplotlib import rc
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import Dataset, DataLoader

from tqdm.notebook import tqdm


In [None]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', font_scale=1.2)

rcParams['figure.figsize'] = 12, 8

tqdm.pandas()

In [None]:
pl.seed_everything(42)

Global seed set to 42


42

## Load data

In [None]:
df = pd.read_csv('Binance_BTCUSDT_minute.csv', skiprows=1, parse_dates=['date'])
df = df.sort_values(by='date').reset_index(drop=True)

In [None]:
df.shape

(322718, 10)

In [None]:
df

Unnamed: 0,unix,date,symbol,open,high,low,close,Volume BTC,Volume USDT,tradecount
0,1610278800000,2021-01-10 11:40:00,BTC/USDT,39455.96,39471.03,39361.77,39419.97,120.857906,4.764110e+05,
1,1610278860000,2021-01-10 11:41:00,BTC/USDT,39419.96,39419.97,39200.00,39335.77,186.719456,7.340855e+06,3100.0
2,1610278920000,2021-01-10 11:42:00,BTC/USDT,39336.00,39395.97,39133.17,39155.81,165.719334,6.503835e+06,2750.0
3,1610278980000,2021-01-10 11:43:00,BTC/USDT,39181.89,39182.77,38888.00,39072.08,653.444303,2.548905e+07,9243.0
4,1610279040000,2021-01-10 11:44:00,BTC/USDT,39046.42,39303.10,39031.03,39294.34,233.590502,9.151271e+06,3408.0
...,...,...,...,...,...,...,...,...,...,...
322713,1629937860000,2021-08-26 00:31:00,BTC/USDT,49034.00,49077.85,49003.30,49029.00,19.657713,9.641287e+05,945.0
322714,1629937920000,2021-08-26 00:32:00,BTC/USDT,49029.01,49041.79,48990.01,48999.80,14.685761,7.198294e+05,807.0
322715,1629937980000,2021-08-26 00:33:00,BTC/USDT,48999.80,49070.25,48983.09,49053.41,24.355314,1.193922e+06,991.0
322716,1629938040000,2021-08-26 00:34:00,BTC/USDT,49053.40,49053.41,49008.02,49038.72,8.390942,4.114275e+05,634.0


# Preprocessing

In [None]:
df['prev_close'] = df.shift(1)['close']
df.head()

Unnamed: 0,unix,date,symbol,open,high,low,close,Volume BTC,Volume USDT,tradecount,prev_close
0,1610278800000,2021-01-10 11:40:00,BTC/USDT,39455.96,39471.03,39361.77,39419.97,120.857906,476411.0,,
1,1610278860000,2021-01-10 11:41:00,BTC/USDT,39419.96,39419.97,39200.0,39335.77,186.719456,7340855.0,3100.0,39419.97
2,1610278920000,2021-01-10 11:42:00,BTC/USDT,39336.0,39395.97,39133.17,39155.81,165.719334,6503835.0,2750.0,39335.77
3,1610278980000,2021-01-10 11:43:00,BTC/USDT,39181.89,39182.77,38888.0,39072.08,653.444303,25489050.0,9243.0,39155.81
4,1610279040000,2021-01-10 11:44:00,BTC/USDT,39046.42,39303.1,39031.03,39294.34,233.590502,9151271.0,3408.0,39072.08


In [None]:
df['close_change'] = df.progress_apply(
    lambda row: 0 if np.isnan(row.prev_close) else row.close - row.prev_close,
    axis=1
)

  0%|          | 0/322718 [00:00<?, ?it/s]

In [None]:
df.head()

Unnamed: 0,unix,date,symbol,open,high,low,close,Volume BTC,Volume USDT,tradecount,prev_close,close_change
0,1610278800000,2021-01-10 11:40:00,BTC/USDT,39455.96,39471.03,39361.77,39419.97,120.857906,476411.0,,,0.0
1,1610278860000,2021-01-10 11:41:00,BTC/USDT,39419.96,39419.97,39200.0,39335.77,186.719456,7340855.0,3100.0,39419.97,-84.2
2,1610278920000,2021-01-10 11:42:00,BTC/USDT,39336.0,39395.97,39133.17,39155.81,165.719334,6503835.0,2750.0,39335.77,-179.96
3,1610278980000,2021-01-10 11:43:00,BTC/USDT,39181.89,39182.77,38888.0,39072.08,653.444303,25489050.0,9243.0,39155.81,-83.73
4,1610279040000,2021-01-10 11:44:00,BTC/USDT,39046.42,39303.1,39031.03,39294.34,233.590502,9151271.0,3408.0,39072.08,222.26


In [None]:
rows = []

for _, row in tqdm(df.iterrows(), total=df.shape[0]):
    row_data = dict(
        day_of_week = row.date.dayofweek,
        day_of_month = row.date.day,
        week_of_year = row.date.week,
        month = row.date.month,
        open = row.open,
        high = row.high,
        low = row.low,
        close_change = row.close_change,
        close = row.close
    )
    rows.append(row_data)

  0%|          | 0/322718 [00:00<?, ?it/s]

In [None]:
features_df = pd.DataFrame(rows)
features_df.head()

Unnamed: 0,day_of_week,day_of_month,week_of_year,month,open,high,low,close_change,close
0,6,10,1,1,39455.96,39471.03,39361.77,0.0,39419.97
1,6,10,1,1,39419.96,39419.97,39200.0,-84.2,39335.77
2,6,10,1,1,39336.0,39395.97,39133.17,-179.96,39155.81
3,6,10,1,1,39181.89,39182.77,38888.0,-83.73,39072.08
4,6,10,1,1,39046.42,39303.1,39031.03,222.26,39294.34


In [None]:
features_df.shape

(322718, 9)

In [None]:
train_size = int(len(features_df) * 0.9)
train_size

290446

In [None]:
train_df, test_df = features_df[:train_size], features_df[train_size:]
train_df.shape, test_df.shape

((290446, 9), (32272, 9))

In [None]:
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(train_df)

In [None]:
train_df = 