In [24]:
import pandas as pd
import numpy as np
import time

In [34]:
df = pd.read_csv("./data/modified_MSFT.csv",)#parse_dates=["Date"]

In [75]:
def get_state(data, t, n):
    print(t,n)
    d = t - n + 1
    print(d)
    block = data[d:t + 1] if d >= 0 else -d * [data[0]] + data[0:t + 1]
    print(block)
    res = []
    for i in range(n - 1):
        res.append(block[i + 1] - block[i])
    #return np.array([res])

In [36]:
class Deep_Evolution_Strategy:

    def __init__(self, weights, 
                 reward_function, 
                 population_size, 
                 sigma, learning_rate):
        self.weights = weights
        self.reward_function = reward_function
        self.population_size = population_size
        self.sigma = sigma
        self.learning_rate = learning_rate

    def _get_w_population(
        self, 
        weights, 
        population):
        weights_population = []
        for index, i in enumerate(population):
            jittered = self.sigma * i
            weights_population.append(weights[index] + jittered)
        return weights_population

    def get_weights(self):
        return self.weights

    def train(self, epoch = 100, print_every = 1):
        lasttime = time.time()
        for i in range(epoch):
            population = []
            rewards = np.zeros(self.population_size)
            for k in range(self.population_size):
                x = []
                for w in self.weights:
                    x.append(np.random.randn(*w.shape))
                population.append(x)
            for k in range(self.population_size):
                weights_population = self._get_w_population(self.weights, population[k])
                rewards[k] = self.reward_function(weights_population)
                rewards = (rewards - np.mean(rewards)) / np.std(rewards)
            for index, w in enumerate(self.weights):
                A = np.array([p[index] for p in population])
                self.weights[index] = w + self.learning_rate / (self.population_size * self.sigma) * np.dot(A.T, rewards).T
            
            if (i+1) % print_every == 0:
                print('iter %d. reward: %f' %  (i+1,self.reward_function(self.weights)))        
                print('time taken to train:', time.time()-lasttime, 'seconds')

In [37]:
class Model:
    def __init__(self, input_size, layer_size, output_size):
        self.weights = [np.random.randn(input_size, layer_size), 
                        np.random.randn(layer_size, output_size),
                        np.random.randn(layer_size, 1),
                        np.random.randn(1, layer_size)]
    
    def predict(self, inputs):
        feed = np.dot(inputs, self.weights[0]) + self.weights[-1]
        decision = np.dot(feed, self.weights[1])
        buy = np.dot(feed, self.weights[2])
        return decision, buy
    
    def get_weights(self):
        return self.weights

    def set_weights(self, weights):
        self.weights = weights

In [38]:
class Agent:
    POPULATION_SIZE = 15
    SIGMA = 0.1
    LEARNING_RATE = 0.03
    
    def __init__(self, model, money, max_buy, max_sell, close, window_size, skip):
        self.window_size = window_size
        self.skip = skip
        self.close = close
        self.model = model
        self.initial_money = money
        self.max_buy = max_buy
        self.max_sell = max_sell
        self.es = Deep_Evolution_Strategy(
                  self.model.get_weights(), 
                  self.get_reward, 
                  self.POPULATION_SIZE, 
                  self.SIGMA, 
                  self.LEARNING_RATE)
    
    def act(self, sequence):
        decision, buy = self.model.predict(np.array(sequence))
        return np.argmax(decision[0]), int(buy[0])
    
    def get_reward(self, weights):
        initial_money = self.initial_money
        starting_money = initial_money
        len_close = len(self.close) - 1
        
        self.model.weights = weights
        state = get_state(self.close, 0, self.window_size + 1)
        inventory = []
        quantity = 0
        for t in range(0, len_close, self.skip):
            action, buy = self.act(state)
            next_state = get_state(
                         self.close, 
                         t + 1, 
                         self.window_size + 1)
            if action == 1 and initial_money >= self.close[t]:
                if buy < 0:
                    buy = 1
                if buy > self.max_buy:
                    buy_units = self.max_buy
                else:
                    buy_units = buy
                total_buy = buy_units * self.close[t]
                initial_money -= total_buy
                inventory.append(total_buy)
                quantity += buy_units
            elif action == 2 and len(inventory) > 0:
                if quantity > self.max_sell:
                    sell_units = self.max_sell
                else:
                    sell_units = quantity
                quantity -= sell_units
                total_sell = sell_units * self.close[t]
                initial_money += total_sell
                
            state = next_state
        return ((initial_money - starting_money) / starting_money) \
               * 100
    
    def fit(self, iterations, checkpoint):
        self.es.train(iterations, print_every=checkpoint)
        
    def buy(self):
        initial_money = self.initial_money
        len_close = len(self.close) - 1
        state = get_state(self.close, 0, self.window_size + 1)
        starting_money = initial_money
        states_sell = []
        states_buy = []
        inventory = []
        quantity = 0
        for t in range(0, len_close, self.skip):
            action, buy = self.act(state)
            next_state = get_state(
                         self.close, 
                         t + 1, 
                         self.window_size + 1)
            if action == 1 and initial_money >= self.close[t]:
                if buy < 0:
                    buy = 1
                if buy > self.max_buy:
                    buy_units = self.max_buy
                else:
                    buy_units = buy
                total_buy = buy_units * self.close[t]
                initial_money -= total_buy
                inventory.append(total_buy)
                quantity += buy_units
                states_buy.append(t)
                print(f"day {t}: buy {buy_units} units at price{total_buy}, total balance {initial_money}")
            elif action == 2 and len(inventory) > 0:
                bought_price = inventory.pop(0)
                if quantity > self.max_sell:
                    sell_units = self.max_sell
                else:
                    sell_units = quantity
                if sell_units < 1:
                    continue
                quantity -= sell_units
                total_sell = sell_units * self.close[t]
                initial_money += total_sell
                states_sell.append(t)
                try:
                    invest = ((total_sell - bought_price)/bought_price) * 100
                except:
                    invest = 0
                print('''day %d, sell %d units at price %f,
                         investment %f %%, total balance %f'''%(t, sell_units, total_sell, invest, initial_money))
            state = next_state
        
        invest = ((initial_money - starting_money)/starting_money)* 100
        print(f'total gained {initial_money - starting_money}, total investment {invest}')
        plt.figure(figsize=(20,10))
        plt.plot(close, label='true close',c='g')
        plt.plot(close, 'X', label='predict buy',markevery=states_buy, c='b')
        plt.plot(close, 'o', label='predict sell',markevery=states_sell,c='r')
        plt.legend()
        plt.show()

In [42]:
model = Model(input_size = 30, 
              layer_size = 500, 
              output_size = 3)
agent = Agent(model = model,
             money = 10000,
             max_buy = 5,
             max_sell = 5,
             close = df["Close"].values,
             window_size = 30,
             skip = 1)

In [43]:
agent.fit(iterations = 500, checkpoint = 10)

IndexError: index 30 is out of bounds for axis 0 with size 30

In [54]:
x = np.random.randn(30,500)
x[0][0]

-2.2922814957919826

In [55]:
x[1][0]

-1.677516139922751

In [57]:
y = x[0] - x[1]
y[0]

-0.6147653558692316

In [58]:
x[0][0] - x[1][0]

-0.6147653558692316

In [59]:
print(np.dot(x[0],x[-1]))

-46.347551367793834


In [61]:
print(np.dot(x[0],x[-1]) + np.random.randn(1,500))

[[-46.0806614  -46.9003029  -46.41812442 -46.67468783 -45.46235026
  -47.57266247 -46.21009512 -45.89794066 -47.89042715 -46.51843999
  -43.54002094 -45.25558484 -45.4296808  -44.99445548 -44.83695339
  -46.73633195 -47.29412232 -47.52588115 -46.02672831 -45.56920538
  -44.61870552 -48.2898838  -45.66284825 -45.74158922 -47.33663672
  -46.85503202 -45.01556615 -46.9378362  -47.15176573 -47.83005736
  -45.5050164  -46.36897406 -46.81752779 -47.20530721 -45.55410581
  -44.37668692 -46.69604469 -46.65830765 -46.73205922 -44.98023716
  -47.11410561 -45.40677226 -47.35374397 -47.16371326 -44.47417907
  -44.63220461 -47.1745382  -46.97249218 -46.82450795 -46.30095533
  -47.05663315 -46.82043723 -46.49705108 -44.75190141 -45.66208178
  -46.54863082 -45.94780441 -45.6153115  -46.3870093  -45.96435058
  -46.45136524 -46.83945721 -45.91272944 -46.41059927 -46.69145343
  -47.0907534  -46.5023215  -45.059603   -46.58586118 -48.00152949
  -45.61965664 -46.09609027 -44.69440988 -47.13124742 -47.2953

In [67]:
test_feed = np.dot(x[0],x[-1]) + np.random.randn(1,500)

In [68]:
test_decision = np.dot(test_feed, np.random.randn(500,3))
np.argmax(test_decision[0])

0

In [70]:
test_buy = np.dot(test_feed, np.random.randn(500,1))
int(test_buy[0])

2502

In [73]:
close = df["Close"].values
close[:5]

array([0.110298, 0.110019, 0.105551, 0.106668, 0.107785])

In [76]:
get_state(close, 0, 30+1)

0 31
-30
[0.220596 0.220596 0.220596 0.220596 0.220596 0.220596 0.220596 0.220596
 0.220596 0.220596 0.220596 0.220596 0.220596 0.220596 0.220596 0.220596
 0.220596 0.220596 0.220596 0.220596 0.220596 0.220596 0.220596 0.220596
 0.220596 0.220596 0.220596 0.220596 0.220596 0.220596]


In [92]:
d = 1 - 31 + 1
d

-29

In [93]:
block = -d * [close[1]] + close[0:1 + 1]
block

ValueError: operands could not be broadcast together with shapes (29,) (2,) 

In [88]:
30 * [close[0]] + close[0:1]

array([0.220596, 0.220596, 0.220596, 0.220596, 0.220596, 0.220596,
       0.220596, 0.220596, 0.220596, 0.220596, 0.220596, 0.220596,
       0.220596, 0.220596, 0.220596, 0.220596, 0.220596, 0.220596,
       0.220596, 0.220596, 0.220596, 0.220596, 0.220596, 0.220596,
       0.220596, 0.220596, 0.220596, 0.220596, 0.220596, 0.220596])

In [91]:
n = 31
res = []
for i in range(n - 1):
    print(i+1)
    #res.append(block[i + 1] - block[i])

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
