In [55]:
import pandas as pd
import numpy as np
import datetime as dt
from sklearn import preprocessing
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

init_notebook_mode(connected=True)

In [56]:
dataDF = pd.read_csv('AAPL.csv')

## Splitting data

In [69]:
'''
training ----> start : 2017/11/1
validation ----> 2017/11/1 : 2018/4/1
test ----> 2018/4/1 : end
'''
train_split_time = pd.datetime(2017,11,1)
val_split_time = pd.datetime(2018,4,1)

train_data = dataDF.loc[pd.to_datetime(dataDF['Date']) <= train_split_time]
val_data = dataDF.loc[(pd.to_datetime(dataDF['Date']) > train_split_time) & (pd.to_datetime(dataDF['Date']) <= val_split_time)]
test_data = dataDF.loc[pd.to_datetime(dataDF['Date']) > val_split_time]

In [70]:
test_data.shape

(92, 7)

## exploring training data

In [4]:
Adj_close = train_data['Adj Close']
trace1 = go.Scatter(
                    x = train_data.Date,
                    y = train_data.Close,
                    mode = "lines",
                    name = "Close",
                    marker = dict(color = '#17BECF'))

trace2 = go.Scatter(
                    x = train_data.Date,
                    y = Adj_close,
                    mode = "lines",
                    name = "Adj Close",
                    marker = dict(color = '#7F7F7F'))
data = [trace1, trace2]
layout = dict(title = 'Close  vs Adj Close prices vs date',
              xaxis= dict(title= 'Date',ticklen= 5,zeroline= True))
fig = dict(data = data, layout = layout)
iplot(fig,filename="Close and Adj Close prices")

## Normalizing data

In [85]:

def generate_data(train_data, test_data, predicted_days):
    '''
    this function takes train data and test data and return splitted normalized data and labels
    '''
    Open = train_data.Open
    Close = train_data.Close 
    close_shifted = Close.shift(-predicted_days) 
    data = pd.concat([Open, Close, close_shifted], axis=1)
    data.columns = ['open','close','close_shifted']
    data = data.dropna()
    y_train = data['close_shifted']
    cols = ['open','close']
    X_train = data[cols]
    
    Open = test_data.Open
    Close = test_data.Close 
    close_shifted = Close.shift(-predicted_days) 
    data = pd.concat([Open, Close, close_shifted], axis=1)
    data.columns = ['open','close','close_shifted']
    data = data.dropna()
    y_test = data['close_shifted']
    cols = ['open','close']
    X_test = data[cols]
    
    scaler_x = preprocessing.MinMaxScaler(feature_range=(-1,1))
    X_train = np.array(X_train).reshape((len(X_train),len(cols)))
    X_train = scaler_x.fit_transform(X_train)
    X_test = np.array(X_test).reshape((len(X_test),len(cols)))
    X_test = scaler_x.fit_transform(X_test)
    
    scaler_y = preprocessing.MinMaxScaler(feature_range=(-1,1))
    y_train = np.array(y_train).reshape(len(y_train),1)
    y_train = scaler_y.fit_transform(y_train)
    y_train = y_train.ravel()
    y_test = np.array(y_test).reshape(len(y_test),1)
    y_test = scaler_y.fit_transform(y_test)
    y_test = y_test.ravel()
    
    return X_train, X_test, y_train, y_test



In [86]:
X_train, X_test, y_train, y_test = generate_data(train_data,test_data, predicted_days=1)


In [87]:
X_test.shape

(91, 2)

## Linear Regression

In [88]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression().fit(X_train,y_train)
print("Training set score: {:.2f}".format(lr.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lr.score(X_test, y_test)))


Training set score: 1.00
Test set score: 0.94


In [89]:
from sklearn.linear_model.stochastic_gradient import SGDRegressor
cln = SGDRegressor()
cln.fit(X_train,y_train)
y_pred = cln.predict(X_test)
print("Test set score: {:.2f}".format(cln.score(X_test, y_test)))

Test set score: 0.93


In [82]:
y_pred

array([-0.79381373, -0.73807509, -0.72826247, -0.54594545, -0.67004252,
       -0.65795683, -0.52806351, -0.56053812, -0.50131706, -0.46108205,
       -0.43349177, -0.35369234, -0.33488794, -0.52176497, -0.73245238,
       -0.81960116, -0.8907796 , -0.93864609, -0.89615905, -0.9377806 ,
       -0.91550171, -0.74864558, -0.41393445, -0.39401877, -0.20240561,
       -0.03298188, -0.0185305 ,  0.0404387 ,  0.12006418,  0.12603798,
        0.10713229,  0.0261888 ,  0.04750424,  0.06252577,  0.03191097,
        0.07571742,  0.07381686,  0.05695231,  0.10221438,  0.10001182,
        0.07308573,  0.06730001,  0.04406834,  0.12930983,  0.23687866,
        0.29668808,  0.32197357,  0.3217064 ,  0.22456772,  0.21856881,
        0.24103097,  0.22957082,  0.21380416,  0.14225656,  0.09613776,
       -0.02287714,  0.01861404,  0.01562046, -0.0186663 , -0.13108761,
       -0.09290619, -0.05256918, -0.04810504, -0.01126636, -0.01921484,
       -0.00505658, -0.02639581,  0.02985587,  0.16726073,  0.18

In [2]:
import matplotlib.pyplot as plt
import datetime as dt