In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle

In [2]:
%matplotlib notebook

In [3]:
data=pd.read_csv('Downloads/stock_market_data-TCS.csv')

In [4]:
data['Average']=(data['Open']-data['Close'])/2
print(len(data['Average']))

1275


In [5]:
data.head()

Unnamed: 0.1,Unnamed: 0,Date,Low,High,Close,Open,Average
0,1274,2018-11-23,5.62,5.77,5.65,5.69,0.02
1,1273,2018-11-21,5.53,5.77,5.75,5.6,-0.075
2,1272,2018-11-20,5.36,5.64,5.56,5.51,-0.025
3,1271,2018-11-19,5.6,5.84,5.64,5.71,0.035
4,1270,2018-11-16,5.535,5.95,5.73,5.94,0.105


In [6]:
def observations():
    length=len(data['Average'])
    for i in range(0,length-3):
        yield np.array(data.iloc[i:i+3,-1])

In [7]:
def sigmoid(x):
    return 1.0/(1.0 + np.exp(-x))

def relu(vector):
    vector[vector < 0] = 0
    return vector

def apply_neural_nets(observation_matrix, weights):
    """ Based on the observation_matrix and weights, compute the new hidden layer values and the new output layer values"""
    hidden_layer_values = np.dot(weights['1'], observation_matrix)
    hidden_layer_values = relu(hidden_layer_values)
    output_layer_values = np.dot(hidden_layer_values, weights['2'])
    output_layer_values = sigmoid(output_layer_values)
    return hidden_layer_values, output_layer_values


In [8]:
def choose_action(probability):
    random_value = np.random.uniform()
    if random_value < probability:
        # signifies up in openai gym
        return 1
    else:
         # signifies down in openai gym
        return 0

In [9]:
def compute_gradient(gradient_log_p, hidden_layer_values, observation_values, weights):
    """ See here: http://neuralnetworksanddeeplearning.com/chap2.html"""
    delta_L = gradient_log_p
    dC_dw2 = np.dot(hidden_layer_values.T, delta_L).ravel()
    delta_l2 = np.outer(delta_L, weights['2'])
    delta_l2 = relu(delta_l2)
    dC_dw1 = np.dot(delta_l2.T, observation_values)
    return {
        '1': dC_dw1,
        '2': dC_dw2
    }

In [10]:
def update_weights(weights, g_dict, learning_rate):
    for layer_name in weights.keys():
        weights[layer_name]=weights[layer_name]-learning_rate*g_dict[layer_name]

In [17]:
num_hidden_layer_neurons = 200
input_dimensions = 3
weights = {
    '1': np.random.randn(num_hidden_layer_neurons, input_dimensions) / np.sqrt(input_dimensions),
    '2': np.random.randn(num_hidden_layer_neurons) / np.sqrt(num_hidden_layer_neurons)
}


In [18]:
max_reward_sum=-999

In [19]:
def main():
    global max_reward_sum
    # hyperparameters
#     num_hidden_layer_neurons = 200
#     input_dimensions = 4
    learning_rate = 1e-4

    reward_sum = 0

    episode_hidden_layer_values, episode_observations, episode_gradient_log_ps, episode_rewards = [], [], [], []

    no_of_epoches=300
    for j in range(no_of_epoches):
        genobject=observations() 
        for i in range(input_dimensions,len(data['Average'])):
            processed_observations=next(genobject)
            hidden_layer_values, up_probability = apply_neural_nets(processed_observations, weights)
            episode_observations.append(processed_observations)
            episode_hidden_layer_values.append(hidden_layer_values)
            action = choose_action(up_probability)

            if (data.iloc[i,-1]>data.iloc[i-1,-1] and action==1) or (data.iloc[i,-1]<data.iloc[i-1,-1] and action==0):
                reward=1
            else:
                reward=-1
            reward_sum += reward

            true_label = 1 if data.iloc[i,-1]>data.iloc[i-1,-1] else 0
            loss_function_gradient=true_label-up_probability
            episode_gradient_log_ps.append(loss_function_gradient)
            episode_hidden_layer_values = np.vstack(episode_hidden_layer_values)
            episode_observations = np.vstack(episode_observations)
            episode_gradient_log_ps = np.vstack(episode_gradient_log_ps)

            gradient = compute_gradient(
              episode_gradient_log_ps,
              episode_hidden_layer_values,
              episode_observations,
              weights
            )

            update_weights(weights,gradient,learning_rate)

            episode_hidden_layer_values, episode_observations, episode_gradient_log_ps, episode_rewards = [], [], [], [] # reset values
        print("total reward after an epoch",reward_sum)
        if reward_sum>max_reward_sum:
            with open('file.txt','wb') as f:
                pickle.dump(weights,f)
            max_reward_sum=reward_sum
        reward_sum=0

In [32]:
main()

total reward after an epoch -616
total reward after an epoch -634
total reward after an epoch -618
total reward after an epoch -632
total reward after an epoch -624
total reward after an epoch -628
total reward after an epoch -626
total reward after an epoch -652
total reward after an epoch -642
total reward after an epoch -626
total reward after an epoch -602
total reward after an epoch -622
total reward after an epoch -622
total reward after an epoch -620
total reward after an epoch -612
total reward after an epoch -632
total reward after an epoch -628
total reward after an epoch -612
total reward after an epoch -628
total reward after an epoch -618
total reward after an epoch -638
total reward after an epoch -632
total reward after an epoch -624
total reward after an epoch -630
total reward after an epoch -624
total reward after an epoch -620
total reward after an epoch -650
total reward after an epoch -632
total reward after an epoch -644
total reward after an epoch -652
total rewa

  


total reward after an epoch -640
total reward after an epoch -648
total reward after an epoch -642
total reward after an epoch -642
total reward after an epoch -642
total reward after an epoch -646
total reward after an epoch -648
total reward after an epoch -642
total reward after an epoch -636
total reward after an epoch -652
total reward after an epoch -644
total reward after an epoch -650
total reward after an epoch -644
total reward after an epoch -642
total reward after an epoch -648
total reward after an epoch -646
total reward after an epoch -636
total reward after an epoch -646
total reward after an epoch -648
total reward after an epoch -644
total reward after an epoch -644
total reward after an epoch -654
total reward after an epoch -646
total reward after an epoch -650
total reward after an epoch -654
total reward after an epoch -652
total reward after an epoch -648
total reward after an epoch -652
total reward after an epoch -648
total reward after an epoch -652
total rewa

KeyboardInterrupt: 

In [33]:
print(max_reward_sum)

56


In [91]:
with open('file.txt','rb') as f:
    w=pickle.load(f)

In [52]:
gen=observations()
results=[]
for i in gen:
    results.append(apply_neural_nets(i,weights)[1])

  


In [53]:
results=[0 if i>=0.5 else 1 for i in results]

In [60]:
x=np.linspace(1,1000,30)
y=np.array(data.iloc[250:280,-1])

plt.plot(x,y)
ax = plt.axes()
for i in range(len(y)):
    if results[250+i]:
        ax.arrow(x[i],y[i],5,0.1, head_width=0.01, head_length=1,color='g')
    else:
        ax.arrow(x[i],y[i],5,-0.1, head_width=0.01, head_length=1,color='r')
plt.show()

<IPython.core.display.Javascript object>