In [1]:
import numpy as np
import pandas as pd
import random

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Multiply
from tensorflow.keras.layers import Add
from tensorflow.keras.layers import Subtract
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import RepeatVector
from tensorflow.keras.layers import Lambda
from tensorflow.keras import Model
from tensorflow.keras import backend
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam

import os
import math
from collections import deque

In [40]:
dataset = pd.read_csv('data/jd.csv')
dataset = dataset.drop(['volume','open_interest','amount','open_interest','datetime'], axis=1)
dataset = dataset.dropna(axis=0)

In [41]:
dataset.shape

(1038090, 7)

In [42]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler(feature_range=(0,1))
buying_price = scaler.fit_transform(dataset.filter(['ask_price1']).values)
selling_price=scaler.fit_transform(dataset.filter(['bid_price1']).values)

In [43]:
buying_price

array([[0.34421365],
       [0.37388724],
       [0.38278932],
       ...,
       [0.04055391],
       [0.04055391],
       [0.04055391]])

In [44]:
selling_price

array([[0.34325397],
       [0.34920635],
       [0.37599206],
       ...,
       [0.0406746 ],
       [0.0406746 ],
       [0.0406746 ]])

In [45]:
dataset['selling_price'] = selling_price
dataset['buying_price'] = buying_price

In [46]:
dataset

Unnamed: 0,last_price,highest,lowest,bid_price1,bid_volume1,ask_price1,ask_volume1,selling_price,buying_price
1,3800.0,3800.0,3800.0,3797.0,2,3800.0,16,0.343254,0.344214
2,3827.0,3830.0,3796.0,3803.0,5,3830.0,8,0.349206,0.373887
3,3830.0,3839.0,3796.0,3830.0,6,3839.0,1,0.375992,0.382789
4,3823.0,3839.0,3796.0,3824.0,1,3837.0,2,0.370040,0.380811
5,3827.0,3839.0,3796.0,3824.0,1,3835.0,2,0.370040,0.378833
...,...,...,...,...,...,...,...,...,...
1048570,3493.0,3502.0,3452.0,3492.0,15,3493.0,169,0.040675,0.040554
1048571,3493.0,3502.0,3452.0,3492.0,18,3493.0,167,0.040675,0.040554
1048572,3493.0,3502.0,3452.0,3492.0,20,3493.0,167,0.040675,0.040554
1048573,3493.0,3502.0,3452.0,3492.0,22,3493.0,157,0.040675,0.040554


In [93]:
data = dataset.values
data.shape
data[0]

array([3.80000000e+03, 3.80000000e+03, 3.80000000e+03, 3.79700000e+03,
       2.00000000e+00, 3.80000000e+03, 1.60000000e+01, 3.43253968e-01,
       3.44213650e-01])

In [98]:
data_price = dataset['last_price'].values
data_price

array([3800., 3827., 3830., ..., 3493., 3493., 3493.])

In [90]:
l = len(data)
l

1038090

In [125]:
window_size = 10
epochs = 1
batch_size = 32

In [126]:
def sigmoid(x):
    return 1/(1+math.exp(-x))

In [127]:
def get_states(data, t, n):
    d = t-n
#     print(d)
    if d>=0:
        block = data[d:t]
    else:
        block = -d*[data[0]]+data[0:t+1]
#     print (block.shape)
    res = []
    for i in range(n-1):
        temp = sigmoid(block[i+1]-block[i])
#         print(temp)
        res.append(temp)
    return np.array([res])

In [128]:
class Agent:

    """Summary of class here.

    This is the agent class that contains all the methods and attributes of the agent

    Attributes:
        state_size: an int indicating the number of state of the information form the envoriment
        action_size: an int indicating the number of actions that the
        ...

    """

    def __init__(self, state_size, action_size, model_name='', is_eval=False):
        self.state_size = state_size
        self.action_size = action_size      # sit, buy, sell
        self.memory = deque(maxlen=100)
        self.inventory = []
        self.is_eval = is_eval

        self.gamma = 0.95
        self.epsolon = 1.0
        self.epsolon_min = 0.01
        self.epsolon_decay = 0.995

        self.model = load_model("models/" + model_name) if is_eval else self._model()


    def _model(self):
        model = Sequential()
        model.add(Dense(units=64, input_dim=self.state_size, activation='relu'))
        model.add(Dense(units=32, activation='relu'))
        model.add(Dense(units=8, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=0.001))
        return model

    def act(self, state):

        if not self.is_eval and np.random.rand()<= self.epsolon:
            return random.randrange(self.action_size)

        options = self.model.predict(state)

        return np.argmax(options[0])

    def expReplay(self, batch_size):

        mini_batch = []
        l = len(self.memory)
        for i in range(1-batch_size+1,1):
            mini_batch.append(self.memory[1])

        for state, action, reward, next_state, done in mini_batch:
            target = reward
            if not done:
                target = reward+self.gamma*np.amax(self.model.predict(next_state)[0])

            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)

        if self.epsolon> self.epsolon_min:
            self.epsolon *= self.epsolon_decay

In [129]:
agent = Agent(window_size,3)

In [130]:


for e in range(epochs):
    state = get_states(data_price, 0, window_size+1)
    print(state.shape)
    total_profit = 0
    
    agent.inventory = []
    
    for t in range(window_size+1, l):
        
        if t%1000 == 0:
            print('Day:', str(t))
        
        action = agent.act(state)
        
        next_state = get_states(data_price, t, window_size+1)
        
        reward = 0
        
        if action == 1: #buy
            agent.inventory.append([data[t][5],data[t][6]])
            print ("buy:", str(data[t][5]))
        elif action == 2 and len(agent.inventory)>0: #sell
            bought_price = agent.inventory.pop(0)[0]
            profit = data[t][3]-bought_price
            reward = max(profit, 0)
            total_profit+= profit
            print ("sell:", str(data[t][3]), "profit:", str(profit))
        
        if t==l-1:
            done = True
        else:
            done = False
            
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state
        
        if done:
            print('---------------------------------------')
            print("Total Profit:", str(total_profit))
            print('---------------------------------------')
        
        if len(agent.memory)>batch_size:
            agent.expReplay(batch_size)
            
    
    

(1, 10)
buy: 3823.0
sell: 3811.0 profit: -12.0
buy: 3823.0
sell: 3811.0 profit: -12.0
buy: 3819.0
sell: 3812.0 profit: -7.0
buy: 3811.0
sell: 3807.0 profit: -4.0
buy: 3810.0
sell: 3805.0 profit: -5.0
buy: 3816.0
sell: 3812.0 profit: -4.0
buy: 3818.0
sell: 3812.0 profit: -6.0
buy: 3820.0
buy: 3819.0
sell: 3818.0 profit: -2.0
sell: 3818.0 profit: -1.0
buy: 3823.0
buy: 3826.0
buy: 3830.0
sell: 3826.0 profit: 3.0
buy: 3830.0
sell: 3827.0 profit: 1.0
sell: 3826.0 profit: -4.0
buy: 3830.0
buy: 3827.0
buy: 3827.0
sell: 3827.0 profit: -3.0
sell: 3827.0 profit: -3.0
sell: 3827.0 profit: 0.0
sell: 3827.0 profit: 0.0
buy: 3832.0
sell: 3830.0 profit: -2.0
buy: 3833.0
sell: 3830.0 profit: -3.0
buy: 3833.0
sell: 3830.0 profit: -3.0
buy: 3831.0
sell: 3821.0 profit: -10.0
buy: 3829.0
sell: 3828.0 profit: -1.0
buy: 3831.0
sell: 3830.0 profit: -1.0
buy: 3835.0
sell: 3833.0 profit: -2.0
buy: 3834.0
buy: 3834.0
buy: 3834.0
buy: 3834.0
buy: 3833.0
buy: 3833.0
sell: 3831.0 profit: -3.0
sell: 3831.0 profit: 

sell: 3826.0 profit: 5.0
sell: 3826.0 profit: 5.0
sell: 3826.0 profit: 5.0
sell: 3826.0 profit: 5.0
sell: 3826.0 profit: 5.0
sell: 3825.0 profit: 4.0
sell: 3825.0 profit: 4.0
sell: 3825.0 profit: 4.0
sell: 3825.0 profit: 3.0
sell: 3824.0 profit: -1.0
sell: 3824.0 profit: -1.0
sell: 3824.0 profit: -2.0
sell: 3822.0 profit: -11.0
sell: 3822.0 profit: -11.0
sell: 3822.0 profit: -15.0
sell: 3822.0 profit: -15.0
sell: 3822.0 profit: -16.0
sell: 3822.0 profit: -16.0
sell: 3822.0 profit: -16.0
sell: 3823.0 profit: -16.0
sell: 3822.0 profit: -16.0
sell: 3822.0 profit: -14.0
sell: 3822.0 profit: -14.0
sell: 3822.0 profit: -13.0
sell: 3822.0 profit: -13.0
sell: 3821.0 profit: -9.0
sell: 3821.0 profit: -7.0
sell: 3821.0 profit: -9.0
sell: 3821.0 profit: -9.0
sell: 3821.0 profit: -7.0
sell: 3822.0 profit: -8.0
sell: 3822.0 profit: -8.0
sell: 3822.0 profit: -8.0
sell: 3822.0 profit: -8.0
sell: 3822.0 profit: -9.0
sell: 3822.0 profit: -11.0
sell: 3822.0 profit: -11.0
sell: 3822.0 profit: -11.0
sell:

KeyboardInterrupt: 