# Multiple Linear Regression with Normalize Data

In [1]:
# Importing the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

# fix_yahoo_finance is used to fetch data 
import fix_yahoo_finance as yf
yf.pdr_override()

In [2]:
# input
symbol = 'AMD'
start = '2014-01-01'
end = '2018-08-27'

# Read data 
dataset = yf.download(symbol,start,end)

# View columns 
dataset.head()

[*********************100%***********************]  1 of 1 downloaded


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-01-02,3.85,3.98,3.84,3.95,3.95,20548400
2014-01-03,3.98,4.0,3.88,4.0,4.0,22887200
2014-01-06,4.01,4.18,3.99,4.13,4.13,42398300
2014-01-07,4.19,4.25,4.11,4.18,4.18,42932100
2014-01-08,4.23,4.26,4.14,4.18,4.18,30678700


In [3]:
X = dataset.iloc[ : , 0:4].values
Y = np.asanyarray(dataset[['Adj Close']])

In [4]:
from sklearn import preprocessing

# normalize the data attributes
normalized_X = preprocessing.normalize(X)

In [5]:
X = normalized_X[: , 1:]

In [6]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)

In [7]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, Y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [8]:
y_pred = regressor.predict(X_test)

In [9]:
from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error, r2_score
ex_var_score = explained_variance_score(Y_test, y_pred)
m_absolute_error = mean_absolute_error(Y_test, y_pred)
m_squared_error = mean_squared_error(Y_test, y_pred)
r_2_score = r2_score(Y_test, y_pred)

print("Explained Variance Score: "+str(ex_var_score))
print("Mean Absolute Error "+str(m_absolute_error))
print("Mean Squared Error "+str(m_squared_error))
print("R Squared Error "+str(r_2_score))

Explained Variance Score: 0.0145762414645
Mean Absolute Error 4.3559157043
Mean Squared Error 22.546676437
R Squared Error 0.0145752513278


In [10]:
print ('Coefficients: ', regressor.coef_)
print("Residual sum of squares: %.2f"
      % np.mean((y_pred - Y_test) ** 2))

# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % regressor.score(X_test, y_pred))

Coefficients:  [[-79.79361894 -53.18582378  15.74315198]]
Residual sum of squares: 22.55
Variance score: 1.00


In [12]:
print('Multiple Linear Score:', regressor.score(X_test, y_pred))

Multiple Linear Score: 0.0145752513278
