In [2]:
pip install quandl

Collecting quandl
  Obtaining dependency information for quandl from https://files.pythonhosted.org/packages/c9/cd/54ab3484243431e02dabab1254ffde296c005e0d11346536e5e02ce6c828/Quandl-3.7.0-py2.py3-none-any.whl.metadata
  Downloading Quandl-3.7.0-py2.py3-none-any.whl.metadata (1.3 kB)
Downloading Quandl-3.7.0-py2.py3-none-any.whl (26 kB)
Installing collected packages: quandl
Successfully installed quandl-3.7.0
Note: you may need to restart the kernel to use updated packages.


In [4]:
# Import the Libraries

import quandl
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [5]:
# Get the stock data.
df = quandl.get("WIKI/FB")
#Take a look at the data.
print(df.head())

             Open   High    Low    Close       Volume  Ex-Dividend  \
Date                                                                 
2012-05-18  42.05  45.00  38.00  38.2318  573576400.0          0.0   
2012-05-21  36.53  36.66  33.00  34.0300  168192700.0          0.0   
2012-05-22  32.61  33.59  30.94  31.0000  101786600.0          0.0   
2012-05-23  31.37  32.50  31.36  32.0000   73600000.0          0.0   
2012-05-24  32.95  33.21  31.77  33.0300   50237200.0          0.0   

            Split Ratio  Adj. Open  Adj. High  Adj. Low  Adj. Close  \
Date                                                                  
2012-05-18          1.0      42.05      45.00     38.00     38.2318   
2012-05-21          1.0      36.53      36.66     33.00     34.0300   
2012-05-22          1.0      32.61      33.59     30.94     31.0000   
2012-05-23          1.0      31.37      32.50     31.36     32.0000   
2012-05-24          1.0      32.95      33.21     31.77     33.0300   

           

In [6]:
# Get the adjusted close price.
df = df[['Adj. Close']]
# Take a look at the data.
print(df.head())

            Adj. Close
Date                  
2012-05-18     38.2318
2012-05-21     34.0300
2012-05-22     31.0000
2012-05-23     32.0000
2012-05-24     33.0300


In [18]:
# A variable for predicting 'n' days out into future.
forecast_out = 30

#Create another column(target or dependent vriable) shifted 'n' units up.
df['Predict'] = df[['Adj. Close']].shift(-forecast_out)

# Print the new dataset.
print(df.head())


            Adj. Close  Predict
Date                           
2012-05-18     38.2318   30.771
2012-05-21     34.0300   31.200
2012-05-22     31.0000   31.470
2012-05-23     32.0000   31.730
2012-05-24     33.0300   32.170


In [22]:
# Let's have a look at the tail of the dataset.
print(df.tail())


            Adj. Close  Predict
Date                           
2018-03-21      169.39      NaN
2018-03-22      164.89      NaN
2018-03-23      159.39      NaN
2018-03-26      160.06      NaN
2018-03-27      152.19      NaN


In [28]:
# Create an independent dataset (X).
# Convert the dataframe to a numpy array.
X = np.array(df.drop(['Predict'],axis =1))
# Remove the last 'n' rows.
X = X[:-forecast_out]
print(X)

[[ 38.2318]
 [ 34.03  ]
 [ 31.    ]
 ...
 [171.5499]
 [175.98  ]
 [176.41  ]]


In [29]:
# Create the dependent dataset (y).
# Convert the dataframe to a numpy array (All the values including NaN's)
y = np.array(df['Predict'])

# Get all of the y values except last 'n' rows.
y = y[:-forecast_out]
print(y)

[ 30.771  31.2    31.47  ... 159.39  160.06  152.19 ]


In [30]:
# Split the data into 80% training and 20% testing.
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2) 

In [33]:
# Create and train the support vector machine (Regressor)
svr_rbf = SVR(kernel = 'rbf', C = 1e3, gamma = 0.1)
svr_rbf.fit(X_train, y_train)

In [36]:
# Testing Model: Score returns the coefficirnt of determination R^2 of the prediction.
# The best possible score is 1.0
svm_confidence = svr_rbf.score(X_test, y_test)
print("svm confidence : ", svm_confidence)

svm confidence :  0.9837088291134152


In [37]:
# Create and train the Linear Regression Model.
lr = LinearRegression()
# Train the model.
lr.fit(X_train, y_train)

In [38]:
# Testing Model : Score returns the coefficient of determination R^2 of the prediction.
# The best possible score is 1.0
lr_confidence = lr.score(X_test, y_test)
print("lr confidence : ", lr_confidence)

lr confidence :  0.9807820494204704


In [40]:
# Set x_forecast equal to the last 30 rows of the original dataset from Adj. Close column.
x_forecast = np.array(df.drop(['Predict'], axis =1))[-forecast_out:]
print(x_forecast)

[[173.15]
 [179.52]
 [179.96]
 [177.36]
 [176.01]
 [177.91]
 [178.99]
 [183.29]
 [184.93]
 [181.46]
 [178.32]
 [175.94]
 [176.62]
 [180.4 ]
 [179.78]
 [183.71]
 [182.34]
 [185.23]
 [184.76]
 [181.88]
 [184.19]
 [183.86]
 [185.09]
 [172.56]
 [168.15]
 [169.39]
 [164.89]
 [159.39]
 [160.06]
 [152.19]]


In [41]:
# Print Linear Regression Model predictions for the next 'n' days.
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)

[176.78277537 183.20357595 183.64708494 181.02635001 179.6655838
 181.58073625 182.66934922 187.0036416  188.65672056 185.1590474
 181.99400598 179.59502555 180.28044853 184.09059393 183.46564945
 187.42699109 186.04606538 188.95911305 188.48536482 185.58239689
 187.91081908 187.57818734 188.81799656 176.18807014 171.7429005
 172.99278947 168.45690209 162.91303973 163.58838296 155.65561992]


In [42]:
# Print Support vector Regressor Model predictions for the next 'n' days.
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)

[177.24340942 181.12300527 182.39777099 177.62790881 175.14776591
 178.1072642  179.76566421 184.71671249 179.61514798 186.37971084
 178.55886335 174.96262938 176.59233009 183.69685657 181.86792306
 182.97677236 187.01839135 179.7113717  179.73971938 186.96734342
 181.0818415  182.34627428 179.61832613 178.66996956 171.29298428
 171.92560176 172.29624567 168.84220939 165.83297736 157.75125948]
