In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('headbrain.csv')

In [None]:
data.head()

In [None]:
X = data['Head Size(cm^3)'].values
Y = data['Brain Weight(grams)'].values

In [None]:
mean_x = np.mean(X)
mean_y = np.mean(Y)

In [None]:
m = len(X)
numer = 0
denom = 0
for i in range(m):
    numer += (X[i]-mean_x)*(Y[i]-mean_y)
    denom += (X[i]-mean_x)**2
b1 = numer/denom
b0 = mean_y - b1*mean_x

print(b1,b0)

In [None]:
max_x = np.max(X) + 100
min_x = np.min(X) - 100

x = np.linspace(min_x, max_x, 1000)
y = b0 + b1*x

In [None]:
plt.plot(x,y)
plt.scatter(X,Y,c="red")
plt.show()

In [None]:
rmse = 0
for i in range(m):
    y_pred = b0 + b1*X[i]
    rmse += (Y[i]-y_pred)**2
rmse = np.sqrt(rmse/m)
print(rmse)

## using sci-kit

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

reg = LinearRegression()
reg.fit(X,Y)
Y_pred = reg.predict(X)
rmse = np.sqrt(mean_squared_error(Y,Y_pred))
print(rmse)

# Multiple Linear Regression

In [None]:
from mpl_toolkits.mplot3d import Axes3D
data = pd.read_csv('student.csv')
print(data.shape)
plt.rcParams['figure.figsize'] = (20.0, 10.0)
data.head()

In [None]:
math = data['Math'].values
reading = data['Reading'].values
y_writing = data['Writing'].values

In [None]:
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(math,reading,y_writing)
plt.show()

In [None]:
m = len(math)
x0 = np.ones(m)
X = np.array([x0, math, reading]).T
B = np.array([0,0,0])
Y = np.array(y_writing)
alpha = 0.0001

In [None]:
def cost_func(X,Y,B):
    m = len(Y)
    J = np.sum((X.dot(B)-Y)**2)/(2*m)
    return J

In [None]:
initial_cost = cost_func(X,Y,B)
print(initial_cost)

In [None]:
def gradient_descent(X:np.array,Y,B,alpha,iterations):
    cost_history = [0]*iterations
    m = len(Y)
    
    for iteration in range(iterations):
        h = X.dot(B)
        loss = h - Y
        gradient = X.T.dot(loss)/m
        B = B - alpha*gradient
        cost = cost_func(X, Y, B)
        cost_history[iteration] = cost
        
    return B, cost_history

In [None]:
newB, cost_history = gradient_descent(X, Y, B, alpha, 100000)

# New Values of B
print(newB)

# Final Cost of new B
print(cost_history)

In [None]:
def rmse(Y, Y_pred):
    rmse = np.sqrt(sum((Y - Y_pred) ** 2) / len(Y))
    return rmse

# Model Evaluation - R2 Score
def r2_score(Y, Y_pred):
    mean_y = np.mean(Y)
    ss_tot = sum((Y - mean_y) ** 2)
    ss_res = sum((Y - Y_pred) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    return r2

Y_pred = X.dot(newB)

print(rmse(Y, Y_pred))

## using sci-kit

In [None]:
reg2 = LinearRegression()
X = np.array([math,reading]).T
Y = np.array(y_writing)
reg2.fit(X,Y)
Y_pred = reg2.predict(X)
rmse = np.sqrt(mean_squared_error(Y,Y_pred))
print(rmse)

In [107]:
newData = pd.read_csv('headbrain.csv')
newData

Unnamed: 0,Gender,Age Range,Head Size(cm^3),Brain Weight(grams)
0,1,1,4512,1530
1,1,1,3738,1297
2,1,1,4261,1335
3,1,1,3777,1282
4,1,1,4177,1590
...,...,...,...,...
232,2,2,3214,1110
233,2,2,3394,1215
234,2,2,3233,1104
235,2,2,3352,1170


In [121]:
head = newData['Head Size(cm^3)'].values
weight = newData['Brain Weight(grams)'].values
head

array([4512, 3738, 4261, 3777, 4177, 3585, 3785, 3559, 3613, 3982, 3443,
       3993, 3640, 4208, 3832, 3876, 3497, 3466, 3095, 4424, 3878, 4046,
       3804, 3710, 4747, 4423, 4036, 4022, 3454, 4175, 3787, 3796, 4103,
       4161, 4158, 3814, 3527, 3748, 3334, 3492, 3962, 3505, 4315, 3804,
       3863, 4034, 4308, 3165, 3641, 3644, 3891, 3793, 4270, 4063, 4012,
       3458, 3890, 4166, 3935, 3669, 3866, 3393, 4442, 4253, 3727, 3329,
       3415, 3372, 4430, 4381, 4008, 3858, 4121, 4057, 3824, 3394, 3558,
       3362, 3930, 3835, 3830, 3856, 3249, 3577, 3933, 3850, 3309, 3406,
       3506, 3907, 4160, 3318, 3662, 3899, 3700, 3779, 3473, 3490, 3654,
       3478, 3495, 3834, 3876, 3661, 3618, 3648, 4032, 3399, 3916, 4430,
       3695, 3524, 3571, 3594, 3383, 3499, 3589, 3900, 4114, 3937, 3399,
       4200, 4488, 3614, 4051, 3782, 3391, 3124, 4053, 3582, 3666, 3532,
       4046, 3667, 2857, 3436, 3791, 3302, 3104, 3171, 3572, 3530, 3175,
       3438, 3903, 3899, 3401, 3267, 3451, 3090, 34

In [132]:
m = len(head)
x0 = np.ones(m)
X = np.array([x0,head]).T
X /= 100
Y = np.array(weight)/100
B = np.array([0,0])


In [133]:
def calc_cost(X,Y,B):
    m = len(Y)
    J = np.sum((X.dot(B)-Y)**2)/(2*m)
    return J

In [135]:
def gradient_descent(X,Y,B,alpha,itrs):
    history = [0]*itrs

    for itr in range(itrs):
     h = X.dot(B)
     loss = (h-Y)
     gradient = X.T.dot(loss)/m
     B = B - alpha*gradient
     cost = calc_cost(X,Y,B)
     history[itr] = cost
    
    return B , history
newB, cost_history = gradient_descent(X, Y, B, alpha, 100000)

# New Values of B
print(newB)

# Final Cost of new B
print(cost_history[-1])


def rmse(Y, Y_pred):
    rmse = np.sqrt(sum((Y - Y_pred) ** 2) / len(Y))
    return rmse

# Model Evaluation - R2 Score
def r2_score(Y, Y_pred):
    mean_y = np.mean(Y)
    ss_tot = sum((Y - mean_y) ** 2)
    ss_res = sum((Y - Y_pred) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    return r2

Y_pred = X.dot(newB)

print(rmse(Y, Y_pred))
print(r2_score(Y, Y_pred))




[0.0033386  0.35212724]
0.3128547690647896
0.7910180390671122
0.5661037602694838
