In [1]:
# this program predicts stock prices by using machine learning models

# import dependencies
import pandas_datareader as web 
import numpy as np 
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [2]:
# get the stock data (Facebook)
df = df = web.DataReader('FB', 
                    data_source='yahoo', 
                    start='2010-01-01', 
                    end='2020-10-17')
df.keys()

Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], dtype='object')

In [3]:
# get the adjusted close price 
df = df[['Adj Close']]
df.tail()

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2020-10-12,275.75
2020-10-13,276.140015
2020-10-14,271.820007
2020-10-15,266.720001
2020-10-16,265.929993


In [4]:
# we want to predict the price n days out into the future (for now n=1)
forecast_out = 30

# create another column for the prediction (the value of the stock n days after)
df['Prediction'] = df[['Adj Close']].shift(-forecast_out)
df.tail()

Unnamed: 0_level_0,Adj Close,Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-10-12,275.75,
2020-10-13,276.140015,
2020-10-14,271.820007,
2020-10-15,266.720001,
2020-10-16,265.929993,


In [5]:
# create the independent dataset
# convert the dataframe into a numpy array
X = np.array(df.drop(['Prediction'], 1))
# remove the last n rows
X = X[:-forecast_out]

# create the dependent dataset 
# convert the datafram into a numpy array
Y = np.array(df['Prediction'])
# get all of the Y values except the last n rows
Y = Y[:-forecast_out]

print('X: \n', X)
print('\nY: \n', Y)

X: 
 [[ 38.22999954]
 [ 34.02999878]
 [ 31.        ]
 ...
 [295.44000244]
 [302.5       ]
 [291.11999512]]

Y: 
 [ 30.77000046  31.20000076  31.46999931 ... 271.82000732 266.72000122
 265.92999268]


In [6]:
# split the data into train and test set
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [7]:
# create and train the Support Vector Machine (Regressor)
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) 
svr_rbf.fit(x_train, y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [8]:
# test the model
# find the R^2 coefficient of the prediction
svm_r2 = svr_rbf.score(x_test, y_test) 
print('SVM confidence:', svm_r2)

SVM confidence: 0.9509775725553585


In [9]:
# create and train a linear regression model 
lr = LinearRegression()
lr.fit(x_train, y_train) 

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [10]:
# test the model
# find the R^2 coefficient of the prediction
lr_r2 = lr.score(x_test, y_test) 
print('Linear Regression confidence:', lr_r2)

Linear Regression confidence: 0.9463111780954463


It seems that the Linear Regression model works better.

In [11]:
# set the x_forecast equal to the last 30 rows of the original 
# dataset from 'Adj Close' column
x_forecast = np.array(df.drop(['Prediction'], 1))[-forecast_out:]
print(x_forecast)

[[282.73001099]
 [271.16000366]
 [273.72000122]
 [268.08999634]
 [266.60998535]
 [266.1499939 ]
 [272.42001343]
 [263.51998901]
 [254.82000732]
 [252.52999878]
 [248.1499939 ]
 [254.75      ]
 [249.02000427]
 [249.52999878]
 [254.82000732]
 [256.82000732]
 [261.79000854]
 [261.8999939 ]
 [266.63000488]
 [259.94000244]
 [264.6499939 ]
 [258.66000366]
 [258.11999512]
 [263.76000977]
 [264.45001221]
 [275.75      ]
 [276.14001465]
 [271.82000732]
 [266.72000122]
 [265.92999268]]


In [16]:
# print the predictions for the next n days 
lr_prediction = lr.predict(x_forecast)
print('Linear regression predictions for the next {} days: \n'.format(forecast_out), lr_prediction)

svm_prediction = svr_rbf.predict(x_forecast)
print('\nSVM predictions for the next {} days: \n'.format(forecast_out), svm_prediction)

Linear regression predictions for the next 30 days: 
 [286.54191964 274.94266903 277.50913701 271.86490224 270.38115052
 269.91999643 276.20586348 267.28334419 258.56137322 256.26557666
 251.87450128 258.49118896 252.74671062 253.25799414 258.56137322
 260.56642824 265.54899118 265.65925452 270.40122065 263.69430917
 268.41620517 262.41107518 261.86970176 267.52397159 268.21571802
 279.54426662 279.93526704 275.60434086 270.49144445 269.69943915]

SVM predictions for the next 30 days: 
 [226.84068097 253.07223315 185.60391287 260.06524326 264.54913954
 262.17484176 220.81698837 249.21781097 259.76003991 272.03952799
 291.44655292 260.13996239 271.77352876 265.27921051 259.76003991
 238.0380773  261.22160891 261.88243173 264.5927227  221.87193657
 246.15870645 208.91801739 214.02191375 246.93755514 245.18539502
 167.41971317 169.53843881 238.11146745 264.72868573 260.17017417]
