In [None]:
from gym import Env
from gym.spaces import Discrete, Box, Dict
import numpy as np
import random
import pandas as pd

BTC_df = pd.read_csv('BTC_merged.csv',index_col=0)
BTC_df = BTC_df.fillna(0)
BTC_df['date'] = BTC_df.index
BTC_df

In [None]:
return_serie = BTC_df.daily_return
print(return_serie.describe())
print(return_serie.skew())
print(return_serie.kurt())
print(sum(BTC_df.daily_up))


count    2127.000000
mean        0.003135
std         0.039870
min        -0.371695
25%        -0.012830
50%         0.002222
75%         0.018922
max         0.252472
Name: daily_return, dtype: float64
-0.10793119927534614
7.335547285390839
1175


In [None]:
Lookback_period = 10

class ModelEnv(Env):
  metadata = {'render.modes': ['human']}
  def __init__ (self,data):
    super(ModelEnv, self).__init__()

    self.observation_space = Box(low = -np.inf, high = np.inf, shape = (Lookback_period,768))

    self.action_space = Discrete(2)

    self.day = len(data)
    self.day_left = self.day - Lookback_period
    self.day_curr = Lookback_period - 1
    self.data = data
    self.data_curr = self.data.loc[self.day_curr - (Lookback_period-1):self.day_curr, :]

    self.state = np.array(self.data_curr.iloc[:,0:768])

    self.reward = 0
    self.reward_hist = []
        
    
    self.done = False

  def step(self, action):

    self.day_left -= 1
    self.day_curr += 1
        
    # update stop flag
    if self.day_left == 0:
      self.done = True

    self.data_curr = self.data.loc[self.day_curr - (Lookback_period-1):self.day_curr, :]
    
    self.state = np.array(self.data_curr.iloc[:,0:768])

    price_move = np.array(self.data_curr.daily_up)[-1]

    if action == price_move:
      self.reward = 0.01
    else:
      self.reward = -0.01

    self.reward_hist.append(self.reward)

    return self.state, self.reward, self.done, {}

  def render(self):
    pass

  def reset(self):
    self.day_left = self.day - Lookback_period
    self.day_curr = Lookback_period - 1
    self.data_curr = self.data.loc[self.day_curr - (Lookback_period-1):self.day_curr, :]

    self.state = np.array(self.data_curr.iloc[:,0:768])

    self.reward = 0
    self.reward_hist = []
        
    
    self.done = False

    return self.state    

In [None]:
start = '2015-01-01'
end = '2020-12-31'
train_data = BTC_df.loc[start:end, :]
train_data.index = train_data["date"].factorize()[0]
print("start from {}".format(train_data.loc[0, :]["date"]))
print(train_data)

start from 2016-01-01
             0         1         2  ...  three_day_up  five_day_up        date
0    -0.065275 -0.076324 -0.003023  ...             1            1  2016-01-01
1     0.067409  0.037402 -0.004317  ...             1            1  2016-01-02
2     0.053023  0.018100 -0.006141  ...             0            0  2016-01-03
3     0.057782 -0.026775 -0.004166  ...             0            1  2016-01-04
4     0.124891 -0.103425 -0.000955  ...             0            1  2016-01-05
...        ...       ...       ...  ...           ...          ...         ...
1818 -0.025810 -0.035738 -0.001328  ...             1            1  2020-12-27
1819  0.035520  0.072237 -0.003250  ...             1            1  2020-12-28
1820  0.037334  0.207177 -0.006020  ...             1            1  2020-12-29
1821  0.008244  0.075200 -0.001997  ...             1            1  2020-12-30
1822  0.054291 -0.022293 -0.003803  ...             1            1  2020-12-31

[1823 rows x 781 columns]


In [None]:
!pip install stable_baselines3
train_env = ModelEnv(train_data)
from stable_baselines3 import A2C
from stable_baselines3 import PPO

train_env.reset()
model1 = PPO("MlpPolicy", train_env, verbose = 1)
#model2 = DQN("MlpPolicy", train_env, verbose = 1)

model1.learn(total_timesteps = 25000)
#model2.learn(total_timesteps = 5000)

Collecting stable_baselines3
  Downloading stable_baselines3-1.3.0-py3-none-any.whl (174 kB)
[K     |████████████████████████████████| 174 kB 5.1 MB/s 
Installing collected packages: stable-baselines3
Successfully installed stable-baselines3-1.3.0
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.81e+03 |
|    ep_rew_mean     | -0.07    |
| time/              |          |
|    fps             | 691      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 2048     |
---------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.81e+03  |
|    ep_rew_mean          | 0.14      |
| time/                   |           |
|    fps                  | 336       |
|    iterations           | 2         |
|    time_elapsed         | 12       

<stable_baselines3.ppo.ppo.PPO at 0x7f8a9db2c750>

In [None]:
start_test = '2021-01-01'
test_data = BTC_df.loc[start_test:, :]
test_data.index = test_data["date"].factorize()[0]
print("start from {}".format(test_data.loc[0, :]["date"]))
print(test_data)
print(sum(test_data.daily_up[10:]))

start from 2021-01-01
            0         1         2  ...  three_day_up  five_day_up        date
0    0.067789  0.020765 -0.004491  ...             1            1  2021-01-01
1    0.101169  0.162836 -0.003575  ...             1            1  2021-01-02
2    0.047099 -0.135316 -0.003982  ...             1            1  2021-01-03
3   -0.040619  0.050465 -0.007430  ...             1            1  2021-01-04
4    0.041792  0.160745 -0.004525  ...             1            1  2021-01-05
..        ...       ...       ...  ...           ...          ...         ...
299 -0.025610  0.047300 -0.003622  ...             0            0  2021-10-27
300  0.058332  0.048267 -0.004826  ...             0            0  2021-10-28
301  0.207442  0.014451 -0.004454  ...             1            1  2021-10-29
302  0.005707  0.042404 -0.001614  ...             1            0  2021-10-30
303  0.139110 -0.060127 -0.003872  ...             1            1  2021-10-31

[304 rows x 781 columns]
152


In [None]:
test_env = ModelEnv(test_data)
episodes = 10
prediction = []
for episode in range(1, episodes + 1):
    episode_prediction = []
    state = test_env.reset()
    done = False
    score = 0

    while not done:
        action, _state = model1.predict(state)
        episode_prediction.append(action)
        #print(action)
        state, reward, done, info = test_env.step(action)
        score += test_env.reward_hist[-1]
    prediction.append(episode_prediction)
    
    print("Episode:{} Score:{}".format(episode, score))

prediction_df = pd.DataFrame(prediction)
prediction_df.to_csv('PPO_pred.csv')
print(prediction_df)

Episode:1 Score:0.08
Episode:2 Score:0.26000000000000006
Episode:3 Score:0.08
Episode:4 Score:0.16
Episode:5 Score:0.20000000000000004
Episode:6 Score:0.26000000000000006
Episode:7 Score:0.16
Episode:8 Score:0.18000000000000002
Episode:9 Score:0.16
Episode:10 Score:0.22000000000000003
   0    1    2    3    4    5    6    ...  287  288  289  290  291  292  293
0    0    0    0    0    0    0    0  ...    0    0    1    1    0    0    0
1    0    1    1    0    1    1    0  ...    0    0    1    1    1    0    0
2    0    0    0    0    1    0    1  ...    0    1    1    1    0    0    0
3    0    1    0    0    0    0    1  ...    0    0    1    1    0    0    0
4    0    0    0    0    0    0    1  ...    0    0    1    1    0    0    0
5    0    0    0    0    0    0    0  ...    0    0    1    1    0    0    0
6    0    0    1    0    0    0    0  ...    0    0    1    1    0    0    0
7    1    1    0    0    0    0    0  ...    0    0    1    1    0    0    1
8    0    0    0    0

In [None]:
true = np.array(test_data.daily_up[10:])
pred = np.array(prediction)[2]
print(pred)

[0 0 0 0 1 0 1 1 0 1 0 0 1 1 1 0 1 0 1 0 0 0 0 0 1 1 1 0 0 1 1 1 0 1 1 1 1
 1 1 1 0 1 0 1 1 1 1 1 0 0 1 1 0 1 0 0 1 1 1 0 1 1 1 1 0 0 1 1 0 1 1 1 1 1
 1 1 1 1 1 1 0 1 1 1 0 0 1 1 0 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0 0 1 1 1 0 0 0
 0 0 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 1 1 0 1 0 0 1 0 1 0 0 1 1 1 1 1 0 1 1
 1 1 0 0 0 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1
 1 0 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 0 1 0 1 0 1 0 0 0 1 0 1 0 1 1 0 1 0
 1 1 1 1 1 0 1 1 1 1 0 1 1 0 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 0 0 0
 0 0 0 0 0 1 1 0 1 0 0 0 1 1 1 0 1 0 0 0 0 0 1 1 1 1 0 1 0 1 1 1 0 0 0]


In [None]:
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

for i in range(10):
  pred = np.array(prediction)[i]
  print('accuracy:{}'.format(accuracy_score(true, pred)))
  print('mcc:{}'.format(matthews_corrcoef(true, pred)))
  print('f1:{}'.format(f1_score(true,pred)))

accuracy:0.5136054421768708
mcc:0.019894444010463443
f1:0.5705705705705706
accuracy:0.54421768707483
mcc:0.08257899081606143
f1:0.6011904761904762
accuracy:0.5136054421768708
mcc:0.018264184349379244
f1:0.5806451612903226
accuracy:0.5272108843537415
mcc:0.04667513778174696
f1:0.592375366568915
accuracy:0.5340136054421769
mcc:0.06214839163442611
f1:0.5861027190332326
accuracy:0.54421768707483
mcc:0.08225857909718148
f1:0.6058823529411765
accuracy:0.5272108843537415
mcc:0.04726197167863438
f1:0.5875370919881305
accuracy:0.5306122448979592
mcc:0.054730726944893754
f1:0.5868263473053892
accuracy:0.5272108843537415
mcc:0.046964657488638206
f1:0.5899705014749264
accuracy:0.5374149659863946
mcc:0.06829128432365814
f1:0.5976331360946746
