# Simple Linear Regression for stock using scikit-learn


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import seaborn as sns
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

import fix_yahoo_finance as yf
yf.pdr_override()

In [2]:
stock = 'AAPL'
start = '2016-01-01' 
end = '2018-01-01'
data = yf.download(stock, start, end)
data.head()

[*********************100%***********************]  1 of 1 downloaded


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-01-04,102.610001,105.370003,102.0,105.349998,100.274513,67649400
2016-01-05,105.75,105.849998,102.410004,102.709999,97.761681,55791000
2016-01-06,100.559998,102.370003,99.870003,100.699997,95.848511,68457400
2016-01-07,98.68,100.129997,96.43,96.449997,91.803276,81094400
2016-01-08,98.550003,99.110001,96.760002,96.959999,92.288696,70798000


In [3]:
df = data.reset_index()
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2016-01-04,102.610001,105.370003,102.0,105.349998,100.274513,67649400
1,2016-01-05,105.75,105.849998,102.410004,102.709999,97.761681,55791000
2,2016-01-06,100.559998,102.370003,99.870003,100.699997,95.848511,68457400
3,2016-01-07,98.68,100.129997,96.43,96.449997,91.803276,81094400
4,2016-01-08,98.550003,99.110001,96.760002,96.959999,92.288696,70798000


In [4]:
X = df.drop(['Date','Close'], axis=1, inplace=True)
y = df[['Adj Close']]

In [5]:
df = df.as_matrix()

In [6]:
from sklearn.model_selection import train_test_split

# Split X and y into X_
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,  random_state=0)

In [7]:
from sklearn.linear_model import LinearRegression

regression_model = LinearRegression()
regression_model.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [8]:
intercept = regression_model.intercept_[0]

print("The intercept for our model is {}".format(intercept))

The intercept for our model is -1.2047109976265347e-09


In [9]:
regression_model.score(X_test, y_test)

1.0

In [10]:
from sklearn.metrics import mean_squared_error

y_predict = regression_model.predict(X_test)

regression_model_mse = mean_squared_error(y_predict, y_test)

regression_model_mse

2.8264629110010686e-19

In [11]:
math.sqrt(regression_model_mse)

5.316448919157475e-10

In [12]:
# input the latest Open, High, Low, Close, Volume
# predicts the next day price
regression_model.predict([[167.81, 171.75, 165.19, 166.48, 37232900]])

array([[166.48]])