## Coding Linear Regression

This notebooks contains the code for implementing Linear Regression inbuilt functions and calculation of the error.

In [1]:
import numpy as np

In [2]:
data = np.loadtxt("Datasets/data.csv", delimiter = ",")

In [3]:
x = data[:, 0]
y = data[:, 1]

In [4]:
x.shape

(100,)

In [5]:
from sklearn import model_selection
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(x, y, test_size = 0.3)

In [6]:
X_train.shape

(70,)

In [7]:
def fit(x_train, y_train):
    num = (x_train * y_train).mean() - x_train.mean() * y_train.mean()
    den = (x_train ** 2).mean() - x_train.mean()**2
    m = num / den
    c = y_train.mean() - m * x_train.mean()
    return m, c

In [8]:
def predict(x, m, c):
    return m*x + c

In [9]:
def score(y_truth, y_pred):
    u = ((y_truth - y_pred)**2).sum()
    v = ((y_truth - y_truth.mean())**2).sum()
    return 1 - u/v

In [10]:
def cost(x, y, m, c):
    return ((y - m * x - c)**2).mean() 

In [11]:
m, c = fit(X_train, Y_train)
# test data
Y_test_pred = predict(X_test, m, c)
print("Test Score: ", score(Y_test, Y_test_pred))

# train data
Y_train_pred = predict(X_train, m, c)
print("Train Score: ", score(Y_train, Y_train_pred))
print("M, C : ", m, c)

Test Score:  0.49836604616341706
Train Score:  0.6166336681129596
M, C :  1.4114494123127326 3.969137322881977


In [12]:
# Cost on training data

print("Cost on training data: ", cost(X_train, Y_train, m, c));

Cost on training data:  122.91421103002581


In [13]:
# Inbuilt Linear Regression

from sklearn.linear_model import LinearRegression 

In [14]:
alg1 = LinearRegression()
X_train = X_train.reshape(-1, 1)
X_test = X_test.reshape(-1, 1)
alg1.fit(X_train, Y_train)

LinearRegression()

In [15]:
Y_pred = alg1.predict(X_test)

In [16]:
print("Training Score: ", alg1.score(X_train, Y_train))
print("Testing Score: ", alg1.score(X_test, Y_pred))

Training Score:  0.6166336681129599
Testing Score:  1.0
