
##Linear Regression Implementation using Python


Importing the libraries

In [None]:
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd

Reading the Data

In [None]:
data = pd.read_csv("headbrain.csv")

Preview of the uploaded csv file

In [None]:
print(data.shape)
data.head()

Storing the values in head size and brain weight in variables X and Y respectively

In [None]:
#Calculating X and Y
X = data['Head Size(cm^3)'].values
Y = data['Brain Weight(grams)'].values


Manually implementing Linear Regression using statistical formulae

In [None]:
# Mean of X and Y
mean_x = nm.mean(X)
mean_y = nm.mean(Y)

#Total number of values
m = len(X)

#using the formulae to calculate b1 and b0
numer = 0
denom = 0
for i in range(m):
    numer += (X[i] - mean_x) * (Y[i] - mean_y)
    denom += (X[i] - mean_x) ** 2
b1 = numer / denom
b0 = mean_y - (b1 * mean_x)

#printing the regression coefficients
print(b1,b0)

Graphical Representation of the regression line and scatter points

In [None]:
#plotting the values and Regression line
max_x = nm.max(X) + 100
min_x = nm.min(X) - 100

# Calculating line values x and y
x = nm.linspace(min_x, max_x, 1000)
y = b0 + b1*x

#plotting line
mtp.plot(x, y, color='#58b970', label='Regression Line')
#plotting scatter points
mtp.scatter(X, Y, color='#ef5423', label='Scatter Plot')

mtp.xlabel('Head Size in cm3')
mtp.ylabel('Brain Weight in grams')
mtp.legend()
mtp.show()


Manually finding the coefficient of determination using statistical formulae

In [43]:
#To find coefficient of determination r^2

numer = 0
denom = 0
for i in range(m):
  y_pred = b0 + b1*X[i]
  numer += (y_pred - mean_y) ** 2
  denom += (Y[i] - mean_y) ** 2
r2 = (numer / denom)
print(r2[0])

0.6393117199570001


Implementation of Simple Linear Regression using scikit learn library in python

In [42]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Cannot use Rank 1 matrix in scikit learn
X = X.reshape((m,1))
# Creating model
reg = LinearRegression()
# Fitting Training data
reg = reg.fit(X, Y)
# Y prediction
Y_pred = reg.predict(X)
# Calculating r2 score
r2_score = reg.score(X, Y)

print(r2_score)

0.639311719957
