# Stock prediction model testing

In [1]:
#Reference: https://youtu.be/EYnC4ACIt2g

import quandl
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [4]:
#Stock data extraciton

df = quandl.get("WIKI/FB")

print(df.head())

             Open   High    Low    Close       Volume  Ex-Dividend  \
Date                                                                 
2012-05-18  42.05  45.00  38.00  38.2318  573576400.0          0.0   
2012-05-21  36.53  36.66  33.00  34.0300  168192700.0          0.0   
2012-05-22  32.61  33.59  30.94  31.0000  101786600.0          0.0   
2012-05-23  31.37  32.50  31.36  32.0000   73600000.0          0.0   
2012-05-24  32.95  33.21  31.77  33.0300   50237200.0          0.0   

            Split Ratio  Adj. Open  Adj. High  Adj. Low  Adj. Close  \
Date                                                                  
2012-05-18          1.0      42.05      45.00     38.00     38.2318   
2012-05-21          1.0      36.53      36.66     33.00     34.0300   
2012-05-22          1.0      32.61      33.59     30.94     31.0000   
2012-05-23          1.0      31.37      32.50     31.36     32.0000   
2012-05-24          1.0      32.95      33.21     31.77     33.0300   

           

In [5]:
#Get the Adj Close Price

df = df[["Adj. Close"]]
print(df.head())

            Adj. Close
Date                  
2012-05-18     38.2318
2012-05-21     34.0300
2012-05-22     31.0000
2012-05-23     32.0000
2012-05-24     33.0300


In [10]:
#Prognozēšana

forecast_out = 30 #Dienu skaits, kas tiek prognozēts (1 = 1 diena)

#Create another column (the target or dependent variable) shifted "n" units up
df["Prediction"] = df[["Adj. Close"]].shift(-forecast_out) #īsumā vnk iepriekšējās dienas cena

print(df.head())
# print(df.tail())

            Adj. Close  Prediction
Date                              
2012-05-18     38.2318      30.771
2012-05-21     34.0300      31.200
2012-05-22     31.0000      31.470
2012-05-23     32.0000      31.730
2012-05-24     33.0300      32.170


In [16]:
#Create the independent data set (X)

X = np.array(df.drop(["Prediction"],1))

#Remove the last "n" rows

X = X[: -forecast_out]

print(X) #List of lists

[[ 38.2318]
 [ 34.03  ]
 [ 31.    ]
 ...
 [171.5499]
 [175.98  ]
 [176.41  ]]


In [17]:
# Create the dependent data set (y) 
#Convert the dataframe to numpy array (All of the values including the "NaN")

y = np.array(df["Prediction"])

#Get all of the y values except the last "n" rows

y = y[: -forecast_out]
print(y) #List

[ 30.771  31.2    31.47  ... 159.39  160.06  152.19 ]


In [22]:
#Split the data into 80% training and 20% testing

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

## Support Vector Machine model

In [23]:
#Create and train the Support Vector Mahine (Regressor)

svr_rbf = SVR(kernel = "rbf", C=1e3, gamma=0.1)

svr_rbf.fit(x_train, y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [25]:
#Testing model: Sore returns coefficient of determination R^2 of the prediction
#The best possible score is 1.0

svm_confidence = svr_rbf.score(x_test, y_test)
print("svm confidence: ", svm_confidence)

svm confidence:  0.9754508816370286


## Linear Regression model

In [26]:
#Create and train the Linear Regression Model

lr = LinearRegression()

#Train the model
lr.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [27]:
#Testing model: Sore returns coefficient of determination R^2 of the prediction
#The best possible score is 1.0

lr_confidence = lr.score(x_test, y_test)
print("lr confidence: ", lr_confidence)

lr confidence:  0.9816510679921611


In [29]:
# Set x_forecast equal to the last 30 rows of the original data set from Adj. Close column

x_forecast = np.array(df.drop(["Prediction"],1))[-forecast_out:]

print(x_forecast) #Last 30 rows of data

[[173.15]
 [179.52]
 [179.96]
 [177.36]
 [176.01]
 [177.91]
 [178.99]
 [183.29]
 [184.93]
 [181.46]
 [178.32]
 [175.94]
 [176.62]
 [180.4 ]
 [179.78]
 [183.71]
 [182.34]
 [185.23]
 [184.76]
 [181.88]
 [184.19]
 [183.86]
 [185.09]
 [172.56]
 [168.15]
 [169.39]
 [164.89]
 [159.39]
 [160.06]
 [152.19]]


In [30]:
#Print Linear regression model predictions for the next 'n' days

lr_prediction = lr.predict(x_forecast)

print(lr_prediction)

[177.30688177 183.76065644 184.20644308 181.57224933 180.20449489
 182.12948263 183.22368618 187.58023737 189.24180574 185.72617024
 182.54487472 180.13357429 180.82251727 184.65222971 184.02407582
 188.00576098 186.61774351 189.54575117 189.06956999 186.15169384
 188.49207367 188.1577337  189.40390997 176.70912242 172.24112457
 173.49743235 168.93825087 163.36591795 164.04472941 156.07122758]


In [33]:
#Print Suport Vector Mahine model predictions for the next 'n' days

svm_prediction = svr_rbf.predict(x_forecast)

print(svm_prediction)

[178.81557226 182.01541839 183.99514268 175.98045275 171.51546069
 177.11674435 179.85592435 181.02966516 178.31645972 187.52422943
 177.9396442  171.29227933 173.72225289 185.78346826 183.18779842
 179.23765274 185.44671525 179.18432347 177.98653284 186.90354163
 177.96709401 178.73100687 178.74011741 179.96219739 172.72262826
 172.77454234 172.20801231 167.98404839 166.16406319 158.1481542 ]
