In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn import datasets, linear_model

In [17]:
# Multiple regression
def read_xom_oil_nasdaq_data():
    def readFile(filename):
        data = pd.read_csv(filename, sep=",", usecols=[0, 6], names=['Date', 'Price'], header=0)
        # sort the data in acscending orderof date
        data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d')
        data = data.sort_values(['Date'], ascending=[True])
        returns = data[[key for key in dict(data.dtypes) if dict(data.dtypes)[key] in ['float64', 'int64']]].pct_change()
        return np.array(returns['Price'])[1:]
    
    nasdaqData = readFile('data/GSPC.csv')
    oilData = readFile('data/USO.csv')
    xomData = readFile('data/XOM.csv')
    
    return (nasdaqData, oilData, xomData)

In [18]:
nasdaqData, oilData, xomData = read_xom_oil_nasdaq_data()
# implementation with a baseline version
combined = np.vstack((nasdaqData, oilData)).T
xomNasdaqOilModel = linear_model.LinearRegression()
xomNasdaqOilModel.fit(combined, xomData)
xomNasdaqOilModel.score(combined, xomData)

print(xomNasdaqOilModel.coef_)
print(xomNasdaqOilModel.intercept_)

[0.8472181  0.20595714]
0.0026239238339583297


In [29]:
# Implementation using tensorflow
nasdaq_W = tf.Variable(tf.zeros([1,1]), name="nasdaq_W")
oil_W = tf.Variable(tf.zeros([1, 1]), name='oil_W')

b = tf.Variable(tf.zeros([1]), name='b')

nasdaq_x = tf.placeholder(tf.float32, [None, 1], name='nasdaq_x')
oil_x = tf.placeholder(tf.float32, [None, 1], name='oil_x')

nasdaq_Wx = tf.matmul(nasdaq_x, nasdaq_W)
oil_Wx = tf.matmul(oil_x, oil_W)

y = nasdaq_Wx + oil_Wx + b

y_ = tf.placeholder(tf.float32, [None, 1])

cost = tf.reduce_mean(tf.square(y_ - y))

train_step_ftrl = tf.train.FtrlOptimizer(1).minimize(cost)

all_x_nasdaq = nasdaqData.reshape(-1, 1)
all_x_oil = oilData.reshape(-1, 1)
all_ys = xomData.reshape(-1, 1)

dataset_size = len(oilData)

def trainWithMultiplepointsPerEpoch(steps, train_step, batch_size):
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init)
        
        for i in range(steps):
            if dataset_size == batch_size:
                batch_start_idx = 0
            elif dataset_size < batch_size:
                raise ValueError("Dataset Size: %d, must be greater than the batch size: %d" % (dataset_size, batch_size))
            else:
                batch_start_idx = (i * batch_size) % dataset_size
                
            batch_end_idx = batch_start_idx + batch_size
            
            batch_x_nasdaq = all_x_nasdaq[batch_start_idx : batch_end_idx]
            batch_x_oil = all_x_oil[batch_start_idx : batch_end_idx]
            batch_ys = all_ys[batch_start_idx : batch_end_idx]
            
            feed = {nasdaq_x: batch_x_nasdaq, oil_x:batch_x_oil, y_: batch_ys}
            
            sess.run(train_step, feed_dict=feed)
            
            if (i + 1) % 500 == 0:
                print("After %d iteration:" % i)
                print("W1: %f" % sess.run(nasdaq_W))
                print("W2: %f" % sess.run(oil_W))
                print("b: %f" % sess.run(b))
                
                print("cost: %f" % sess.run(cost, feed_dict=feed))
                

trainWithMultiplepointsPerEpoch(5000, train_step_ftrl, len(oilData))

After 499 iteration:
W1: 0.847218
W2: 0.205957
b: 0.002624
cost: 0.030455
After 999 iteration:
W1: 0.847218
W2: 0.205957
b: 0.002624
cost: 0.030455
After 1499 iteration:
W1: 0.847218
W2: 0.205957
b: 0.002624
cost: 0.030455
After 1999 iteration:
W1: 0.847218
W2: 0.205957
b: 0.002624
cost: 0.030455
After 2499 iteration:
W1: 0.847218
W2: 0.205957
b: 0.002624
cost: 0.030455
After 2999 iteration:
W1: 0.847218
W2: 0.205957
b: 0.002624
cost: 0.030455
After 3499 iteration:
W1: 0.847218
W2: 0.205957
b: 0.002624
cost: 0.030455
After 3999 iteration:
W1: 0.847218
W2: 0.205957
b: 0.002624
cost: 0.030455
After 4499 iteration:
W1: 0.847218
W2: 0.205957
b: 0.002624
cost: 0.030455
After 4999 iteration:
W1: 0.847218
W2: 0.205957
b: 0.002624
cost: 0.030455
