In [None]:
# This project predicts stock prices using machine learning modelsabs
# !pip install quandl

In [137]:
# Install dependecies
import quandl
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [142]:
# Get stock data
df = quandl.get('WIKI/AMZN') # WE use Amazon stock for analysis
print(df.head())

             Open   High    Low  Close     Volume  Ex-Dividend  Split Ratio  \
Date                                                                          
1997-05-16  22.38  23.75  20.50  20.75  1225000.0          0.0          1.0   
1997-05-19  20.50  21.25  19.50  20.50   508900.0          0.0          1.0   
1997-05-20  20.75  21.00  19.63  19.63   455600.0          0.0          1.0   
1997-05-21  19.25  19.75  16.50  17.13  1571100.0          0.0          1.0   
1997-05-22  17.25  17.38  15.75  16.75   981400.0          0.0          1.0   

            Adj. Open  Adj. High  Adj. Low  Adj. Close  Adj. Volume  
Date                                                                 
1997-05-16   1.865000   1.979167  1.708333    1.729167   14700000.0  
1997-05-19   1.708333   1.770833  1.625000    1.708333    6106800.0  
1997-05-20   1.729167   1.750000  1.635833    1.635833    5467200.0  
1997-05-21   1.604167   1.645833  1.375000    1.427500   18853200.0  
1997-05-22   1.437500   1.

In [144]:
# Get adjusted close price
df = df[['Adj. Close']]
print(df.head())

            Adj. Close
Date                  
1997-05-16    1.729167
1997-05-19    1.708333
1997-05-20    1.635833
1997-05-21    1.427500
1997-05-22    1.395833


In [146]:
# A variable for predicting 'n' days out in the future.
forecast_out = 30 # current value of n = 30

In [148]:
# Create a new column (The target or dependent variable) shifted 'n' units up
df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)
print(df.head())
# print(df.tail())

            Adj. Close  Prediction
Date                              
1997-05-16    1.729167    1.541667
1997-05-19    1.708333    1.515833
1997-05-20    1.635833    1.588333
1997-05-21    1.427500    1.911667
1997-05-22    1.395833    2.000000


In [150]:
# CREATE AN INDEPENDENT DATA SET (X)
# COonvert dataframe to numpt array
X = np.array(df.drop(['Prediction'], axis=1))
# Removing last 'n' rows
X = X[:-forecast_out]
print(X)

[[   1.72916667]
 [   1.70833333]
 [   1.63583333]
 ...
 [1350.47      ]
 [1338.99      ]
 [1386.23      ]]


In [152]:
# CREATE A DEPENDENT DATA SET (y)
#COonvert dataframe to numpt array including all 'NaN' values
y = np.array(df['Prediction'])
# get all values for y except last 'n' values
y = y[:-forecast_out]
print(y)

[1.54166667e+00 1.51583333e+00 1.58833333e+00 ... 1.49556000e+03
 1.55586000e+03 1.49705000e+03]


In [154]:
# split data into train set =80% and test set =20%
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)

In [156]:
# Create and train SUpport Vevtor Machine (Regressor)
svr_rbf = SVR(kernel ='rbf', C =1e3, gamma =0.1)
svr_rbf.fit(X_train, y_train)

In [157]:
# Testing SVM Model
svm_confidence = svr_rbf.score(X_test, y_test)
print('Prediction Score: ', svm_confidence)

Prediction Score:  0.9486547299351695


In [160]:
# Create and train Linear Regression Model
lr = LinearRegression()
# Training model
lr.fit(X_train, y_train)

In [162]:
# Testing Linear Regression Model
lr_confidence = lr.score(X_test, y_test)
print('Prediction Score: ', lr_confidence)

Prediction Score:  0.9886397124824949


In [164]:
# set 'X_forecast' = last 30 rows of original data set from 'Adj. Close 'column
X_forecast = np.array(df.drop(['Prediction'], axis=1))[-forecast_out:]
print(X_forecast)

[[1414.51]
 [1451.05]
 [1461.76]
 [1448.69]
 [1468.35]
 [1482.92]
 [1484.76]
 [1500.  ]
 [1521.95]
 [1511.98]
 [1512.45]
 [1493.45]
 [1500.25]
 [1523.61]
 [1537.64]
 [1545.  ]
 [1551.86]
 [1578.89]
 [1598.39]
 [1588.18]
 [1591.  ]
 [1582.32]
 [1571.68]
 [1544.93]
 [1586.51]
 [1581.86]
 [1544.1 ]
 [1495.56]
 [1555.86]
 [1497.05]]


In [166]:
# print Linear Regression Model prediction for next 'n' days
lr_prediction = lr.predict(X_forecast)
print(lr_prediction)

[1502.52669928 1541.42463235 1552.82575067 1538.91234111 1559.84100461
 1575.35120945 1577.30994499 1593.53338506 1616.89982269 1606.28645672
 1606.7867859  1586.56071233 1593.79951761 1618.6669428  1633.60230134
 1641.43724353 1648.73992062 1677.5141716  1698.27251027 1687.40365705
 1690.40563218 1681.16551015 1669.83890895 1641.36272641 1685.62589164
 1680.67582626 1640.47916636 1588.80687103 1652.99804137 1590.39302101]


In [168]:
# print SVm Model prediction for next 'n' days
svm_prediction = svr_rbf.predict(X_forecast)
print(svm_prediction)

[1056.67094972 1550.82829768  676.19783468 1080.62611186  676.16487611
  676.16487611  676.16487611  676.16487611  676.16487611  676.16487611
  676.16487611  676.16487611  676.16487611  676.16487611  676.16487611
  676.16487611  676.16487611  676.16487611  676.16487611  676.16487611
  676.16487611  676.16487611  676.16487611  676.16487611  676.16487611
  676.16487611  676.16487611  676.16487611  676.16487611  676.16487611]
