In [1]:
import quandl
import numpy as np 
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [2]:
#Import GOOGLE stock "GOOGL" Quandl dataset
df = quandl.get("WIKI/GOOGL")
print(df.head())

              Open    High     Low    Close      Volume  Ex-Dividend  \
Date                                                                   
2004-08-19  100.01  104.06   95.96  100.335  44659000.0          0.0   
2004-08-20  101.01  109.08  100.50  108.310  22834300.0          0.0   
2004-08-23  110.76  113.48  109.05  109.400  18256100.0          0.0   
2004-08-24  111.24  111.60  103.57  104.870  15247300.0          0.0   
2004-08-25  104.76  108.00  103.88  106.000   9188600.0          0.0   

            Split Ratio  Adj. Open  Adj. High   Adj. Low  Adj. Close  \
Date                                                                   
2004-08-19          1.0  50.159839  52.191109  48.128568   50.322842   
2004-08-20          1.0  50.661387  54.708881  50.405597   54.322689   
2004-08-23          1.0  55.551482  56.915693  54.693835   54.869377   
2004-08-24          1.0  55.792225  55.972783  51.945350   52.597363   
2004-08-25          1.0  52.542193  54.167209  52.100830   53.1

In [3]:
#Adjusted Close Price 
df = df[['Adj. Close']]
print(df.head())

            Adj. Close
Date                  
2004-08-19   50.322842
2004-08-20   54.322689
2004-08-23   54.869377
2004-08-24   52.597363
2004-08-25   53.164113


In [4]:
forecast_out = 15 #for next 15 days
df['Prediction'] = df[['Adj. Close']].shift(-forecast_out) #shift 15 units up
print(df.tail())

            Adj. Close  Prediction
Date                              
2018-03-21     1094.00         NaN
2018-03-22     1053.15         NaN
2018-03-23     1026.55         NaN
2018-03-26     1054.09         NaN
2018-03-27     1006.94         NaN


In [5]:
X = np.array(df.drop(['Prediction'],1))
#Remove the last '15 rows
X = X[:-forecast_out]
print(X)

[[  50.32284179]
 [  54.32268894]
 [  54.86937651]
 ...
 [1084.14      ]
 [1094.76      ]
 [1100.9       ]]


In [6]:
y = np.array(df['Prediction'])
# Get all of the y values except the last 15 rows
y = y[:-forecast_out]
print(y)

[  52.82807521   53.91643487   55.91761231 ... 1026.55       1054.09
 1006.94      ]


In [7]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [8]:
#Using Support Vector Machine (Regressor) 
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) 
svr_rbf.fit(x_train, y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [9]:
#Checking SVM confidence value
svm_confidence = svr_rbf.score(x_test, y_test)
print("svm confidence: ", svm_confidence)

svm confidence:  0.9782546584515299


In [10]:
#Linear Regression  Model
lr = LinearRegression()
lr.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [11]:
#Checking Linear Regression confidence value
lr_confidence = lr.score(x_test, y_test)
print("lr confidence: ", lr_confidence)

lr confidence:  0.9900122071069996


In [12]:
#Last 15 rows of the original data set from Adj. Close column
x_forecast = np.array(df.drop(['Prediction'],1))[-forecast_out:]
print(x_forecast)

[[1115.04]
 [1129.38]
 [1160.84]
 [1165.93]
 [1139.91]
 [1148.89]
 [1150.61]
 [1134.42]
 [1100.07]
 [1095.8 ]
 [1094.  ]
 [1053.15]
 [1026.55]
 [1054.09]
 [1006.94]]


In [13]:
#Predictions of Linear Regression model for next 15 days
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)

[1124.13215813 1138.57061958 1170.24663055 1175.37157956 1149.17292077
 1158.21457934 1159.94638922 1143.64522528 1109.05937096 1104.76005225
 1102.94769307 1061.81720839 1035.0345672  1062.76366263 1015.28992082]


In [14]:
#Predictions of SVM model for next 15 days
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)

[1143.46552015 1150.8845355   750.34646687  956.7758958  1029.60121753
  635.35263236  605.16069311  918.73700861  960.82425171 1101.17091114
 1041.61691777 1125.52775585  617.80449594 1116.37962885 1072.87623097]
