In [0]:
pip install quandl

In [0]:
# This program predicts stock prices by using machine learning models

#Install the dependencies
import quandl
import numpy as np 

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [24]:
#Get the stock data
df = quandl.get("WIKI/GOOG")
# Take a look at the data
print(df.head())

               Open    High     Low  ...  Adj. Low  Adj. Close  Adj. Volume
Date                                 ...                                   
2014-03-27  568.000  568.00  552.92  ...    552.92      558.46      13100.0
2014-03-28  561.200  566.43  558.67  ...    558.67      559.99      41100.0
2014-03-31  566.890  567.00  556.93  ...    556.93      556.97      10800.0
2014-04-01  558.710  568.45  558.71  ...    558.71      567.16       7900.0
2014-04-02  565.106  604.83  562.19  ...    562.19      567.00     146700.0

[5 rows x 12 columns]


In [25]:
# Get the Adjusted Close Price
df = df[['Adj. Close']]
#Take a look at the new data
print(df.head())

            Adj. Close
Date                  
2014-03-27      558.46
2014-03-28      559.99
2014-03-31      556.97
2014-04-01      567.16
2014-04-02      567.00


In [38]:
# A variable for predicting 'n' days out into the future
forecast_out = 100 #'n=30' days
#Create another column (the target or dependent variable) shifted 'n' units up
df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)
#print the new data set
print(df.tail())

            Adj. Close  Prediction
Date                              
2018-03-21     1090.88         NaN
2018-03-22     1049.08         NaN
2018-03-23     1021.57         NaN
2018-03-26     1053.21         NaN
2018-03-27     1005.10         NaN


In [27]:
### Create the independent data set (X)  #######
# Convert the dataframe to a numpy array
X = np.array(df.drop(['Prediction'],1))

#Remove the last 'n' rows
X = X[:-forecast_out]
print(X)

[[ 558.46  ]
 [ 559.99  ]
 [ 556.97  ]
 [ 567.16  ]
 [ 567.    ]
 [ 569.74  ]
 [ 543.14  ]
 [ 538.15  ]
 [ 554.9   ]
 [ 564.14  ]
 [ 540.95  ]
 [ 530.6   ]
 [ 532.52  ]
 [ 536.44  ]
 [ 556.54  ]
 [ 536.1   ]
 [ 528.62  ]
 [ 534.81  ]
 [ 526.94  ]
 [ 525.16  ]
 [ 516.18  ]
 [ 517.15  ]
 [ 527.7   ]
 [ 526.66  ]
 [ 531.35  ]
 [ 527.93  ]
 [ 527.81  ]
 [ 515.14  ]
 [ 509.96  ]
 [ 511.    ]
 [ 518.73  ]
 [ 529.92  ]
 [ 533.09  ]
 [ 526.65  ]
 [ 519.98  ]
 [ 520.63  ]
 [ 528.86  ]
 [ 529.77  ]
 [ 538.94  ]
 [ 545.06  ]
 [ 552.7   ]
 [ 565.95  ]
 [ 561.68  ]
 [ 560.08  ]
 [ 559.89  ]
 [ 553.93  ]
 [ 544.94  ]
 [ 544.66  ]
 [ 553.9   ]
 [ 556.33  ]
 [ 562.12  ]
 [ 560.55  ]
 [ 558.84  ]
 [ 551.35  ]
 [ 551.76  ]
 [ 544.28  ]
 [ 543.01  ]
 [ 553.37  ]
 [ 554.9   ]
 [ 556.36  ]
 [ 564.95  ]
 [ 564.62  ]
 [ 578.65  ]
 [ 576.    ]
 [ 577.24  ]
 [ 575.28  ]
 [ 582.67  ]
 [ 582.335 ]
 [ 584.73  ]
 [ 582.25  ]
 [ 571.09  ]
 [ 576.08  ]
 [ 571.1   ]
 [ 579.18  ]
 [ 584.87  ]
 [ 584.78  ]
 [ 582.66  ]

In [28]:
### Create the dependent data set (y)  #####
# Convert the dataframe to a numpy array (All of the values including the NaN's)
y = np.array(df['Prediction'])
# Get all of the y values except the last 'n' rows
y = y[:-forecast_out]
print(y)

[ 518.73    529.92    533.09    526.65    519.98    520.63    528.86
  529.77    538.94    545.06    552.7     565.95    561.68    560.08
  559.89    553.93    544.94    544.66    553.9     556.33    562.12
  560.55    558.84    551.35    551.76    544.28    543.01    553.37
  554.9     556.36    564.95    564.62    578.65    576.      577.24
  575.28    582.67    582.335   584.73    582.25    571.09    576.08
  571.1     579.18    584.87    584.78    582.66    573.7299  595.08
  589.47    594.74    595.98    593.35    589.02    590.6     585.61
  587.42    571.6     566.07    573.15    565.07    566.374   563.36
  568.77    567.88    562.73    574.78    574.65    573.48    582.16
  586.86    584.49    583.37    582.56    580.2     577.86    571.
  569.2     571.6     577.33    577.94    581.98    586.08    589.72
  581.01    583.1     581.35    575.62    573.1     579.95    584.77
  589.27    596.08    587.37    581.13    587.99    575.06    577.1
  576.36    577.36    568.27    570.0

In [0]:

# Split the data into 80% training and 20% testing
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [30]:
# Create and train the Support Vector Machine (Regressor)
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_rbf.fit(x_train, y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [31]:
# Testing Model: Score returns the coefficient of determination R^2 of the prediction. 
# The best possible score is 1.0
svm_confidence = svr_rbf.score(x_test, y_test)
print("svm confidence: ", svm_confidence)

svm confidence:  0.8658609455242342


In [32]:
# Create and train the Linear Regression  Model
lr = LinearRegression()
# Train the model
lr.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [33]:
# Testing Model: Score returns the coefficient of determination R^2 of the prediction. 
# The best possible score is 1.0
lr_confidence = lr.score(x_test, y_test)
print("lr confidence: ", lr_confidence)

lr confidence:  0.9232627836214896


In [34]:
# Set x_forecast equal to the last 30 rows of the original data set from Adj. Close column
x_forecast = np.array(df.drop(['Prediction'],1))[-forecast_out:]
print(x_forecast)

[[1052.1 ]
 [1069.7 ]
 [1089.52]
 [1094.8 ]
 [1102.46]
 [1111.34]
 [1106.63]
 [1126.79]
 [1143.75]
 [1118.29]
 [1104.73]
 [1069.52]
 [1078.92]
 [1090.93]
 [1095.06]
 [1109.64]
 [1126.  ]
 [1160.04]
 [1164.5 ]
 [1138.17]
 [1149.49]
 [1149.58]
 [1135.73]
 [1099.82]
 [1097.71]
 [1090.88]
 [1049.08]
 [1021.57]
 [1053.21]
 [1005.1 ]]


In [37]:

# Print linear regression model predictions for the next 'n' days
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)

# Print support vector regressor model predictions for the next 'n' days
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)

[1075.3314782  1093.31443949 1113.56570612 1118.96059451 1126.78726971
 1135.86049109 1131.04800542 1151.64667017 1168.9757056  1142.96171728
 1129.10666301 1093.13052284 1102.73505898 1115.00638654 1119.22625189
 1134.12350051 1150.83948043 1185.62016238 1190.17720825 1163.27428946
 1174.8406032  1174.93256152 1160.7811971  1124.08982551 1121.93391368
 1114.95529859 1072.24576552 1044.13717092 1076.46563087 1027.30879748]
[1005.70059565 1078.80205282  919.38723403  833.32457116 1118.58726121
 1108.50140365 1117.30930274  952.2668467   837.8353129   883.54053683
 1160.12880714 1068.6614147  1137.39094683  861.92265186  833.46599503
 1099.75173292  956.15231223  883.55047155 1123.32955754 1076.77637482
  832.7008042   832.70077554 1050.58698446  931.88485602  850.67887693
  863.27445175 1155.84130214 1117.02423485 1052.39574265  842.45099003]
