# Regression From Scratch

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('Data/ex1-Copy1.csv', header= None, names=['population', 'profit'], delimiter=',')
df["intercept"]=1
df.head()


Unnamed: 0,population,profit,intercept
0,6.1101,17.592,1
1,5.5277,9.1302,1
2,8.5186,13.662,1
3,7.0032,11.854,1
4,5.8598,6.8233,1


## Setting the Gradients equal to 0

In [3]:
from numpy.linalg import inv
x=df.iloc[:,[2,0]].values
y=df.iloc[:,1].values.reshape(-1,1)

In [4]:
x.shape,y.shape

((97, 2), (97, 1))

In [5]:
part1=inv(x.T.dot(x))
part1.shape

(2, 2)

In [6]:
part2=x.T.dot(y)
part2.shape

(2, 1)

In [7]:
part1@part2

array([[-3.89578088],
       [ 1.19303364]])

In [8]:
part3=y.T.dot(x)
part3.T

array([[ 566.3961    ],
       [6336.89842532]])

In [9]:
part1@part3.T

array([[-3.89578088],
       [ 1.19303364]])

### Using Covariance 

In [10]:
x_new=df.iloc[:,0]
y_new=df.iloc[:,1]

In [11]:
mean_x=np.mean(x_new)
mean_y=np.mean(y_new)

In [12]:
b1=np.sum((x_new - mean_x) * (y_new - mean_y))/np.sum((x_new - mean_x) ** 2)
b0 = mean_y - (b1 * mean_x)
print(b0,b1)

-3.89578087831186 1.1930336441895937


### Using Correlation

In [13]:
b1=df.corr().iloc[1,0]*(np.sqrt(np.sum((y_new - mean_y) ** 2))/np.sqrt(np.sum((x_new - mean_x) ** 2)))
b0 = mean_y - (b1 * mean_x)
print(b0,b1)

-3.8957808783118635 1.1930336441895941


## Gradient Descent

In [14]:
xi=df['population'].values
yi=df['profit'].values

In [15]:
def gradientdescent(b0,b1,yi,xi,tolerance,eta,verbose=False):
    mag=100
    while mag>tolerance:
        coef=np.array([b0,b1])
        error=yi-(b0+(b1*xi))
        derv=np.array([np.sum(error),np.sum(error*xi)])
        new_co=coef+((2*eta)*derv)
        if verbose==True:
            print(b0,b1,mag,derv)
        b0,b1=new_co[0],new_co[1]
        mag=np.sqrt(np.sum(error)**2+np.sum(error*xi)**2)
    return b0,b1


In [16]:
for i in [0.1,0.01,0.0001]:
    print(gradientdescent(2,3,np.array([1,3,7,13,21]),np.array([0,1,2,3,4]),0.01,i))

(nan, nan)
(-0.9939597615566699, 4.997881198760072)
(-0.9936783876089295, 4.99778249810861)


In [17]:
for i in [0.1,0.01,0.0001]:
    print(gradientdescent(0,0,yi,xi,0.01,i))

(nan, nan)
(nan, nan)
(-3.8952142727726105, 1.1929767225173094)


In [18]:
gradientdescent(0,0,np.array([1,3,7,13,21]),np.array([0,1,2,3,4]),0.01,0.01)

(-0.9939158298014424, 4.997865788332434)

In [19]:
gradientdescent(0,0,yi,xi,0.01,0.0001)

(-3.8952142727726105, 1.1929767225173094)

In [20]:
print('The b0 is {0} and b1 is {1}'.format(*gradientdescent(0,0,yi,xi,0.01,0.0001)))

The b0 is -3.8952142727726105 and b1 is 1.1929767225173094


## Gradient Descent - General

In [21]:
def gradientdesc(*b,yi,xi,tolerane,eta,verbose=False):
    mag=100
    xi=np.c_[[1 for _ in range(len(xi))],xi]
    while mag>tolerane:
        coef=np.array([*b]).reshape(-1,1)
        pred=xi
        error=yi.reshape(-1,1)-pred.dot(coef)
        a=[np.sum(error)]
        for i in range(1,xi.shape[1]):
            a.append(np.sum(error*xi[:,i].reshape(-1,1)))
        derv=np.array(a)
        new_co=coef+((2*eta)*derv.reshape(-1,1))
        if verbose==True:
            print(*new_co)
        ns=new_co
        b=ns.flatten()
        m=[]
        for z in derv:
            m.append(z**2)
        mag=np.sqrt(np.sum(m))
    return b

In [22]:
gradientdesc(0,0,yi=np.array([1,3,7,13,21]),xi=np.array([0,1,2,3,4]),tolerane=0.01,eta=0.01)

array([-0.99391583,  4.99786579])

In [23]:
gradientdesc(0,0,yi=yi,xi=xi,tolerane=0.01,eta=0.0001)

array([-3.89521427,  1.19297672])

# Coordinate Descent

In [44]:
y1=np.array([1,3,7,13,21])
x1=np.array([0,1,2,3,4])
xi=df['population'].values
yi=df['profit'].values

In [45]:
def coordinate_descent(X,y,alpha = .03, num_iters=20,normalize=False,verbose=False):
    if normalize==False:
        X=np.c_[np.ones(X.shape[0]),X]
        m,n = X.shape
        theta = np.zeros(n).reshape(-1,1)
        print(theta.flatten())
    
        for i in range(num_iters):
            for j in range(n):
                h = X @ theta
                gradient =  (X[:,j] @ (h-y))
                theta[j] = theta[j] - alpha * gradient
                if verbose==True:
                    print(theta.flatten())
                       
        return theta 
    elif normalize==True:
        X=np.c_[np.ones(X.shape[0]),X]
        X = X / (np.linalg.norm(X,axis = 0)) 
        m,n = X.shape
        theta = np.zeros(n).reshape(-1,1)
        print(theta.flatten())
        for i in range(num_iters):
            for j in range(n):
                h = (X[:,0:j] @ theta[0:j]) + (X[:,j+1:] @ theta[j+1:])
                theta[j] =  (X[:,j] @ (y - h))
                if verbose==True:
                    print(theta.flatten())
                       
    return theta

In [46]:
coordinate_descent(x1,y1.reshape(-1,1),num_iters=100,normalize=True,verbose=True)

[0. 0.]
[20.1246118  0.       ]
[20.1246118   9.12870929]
[12.67105187  9.12870929]
[12.67105187 15.21451549]
[ 7.70201192 15.21451549]
[ 7.70201192 19.27171962]
[ 4.38931862 19.27171962]
[ 4.38931862 21.97652237]
[ 2.18085642 21.97652237]
[ 2.18085642 23.7797242 ]
[ 0.70854829 23.7797242 ]
[ 0.70854829 24.98185876]
[-0.27299047 24.98185876]
[-0.27299047 25.7832818 ]
[-0.92734964 25.7832818 ]
[-0.92734964 26.31756382]
[-1.36358908 26.31756382]
[-1.36358908 26.67375184]
[-1.65441538 26.67375184]
[-1.65441538 26.91121052]
[-1.84829958 26.91121052]
[-1.84829958 27.0695163 ]
[-1.97755571 27.0695163 ]
[-1.97755571 27.17505349]
[-2.06372647 27.17505349]
[-2.06372647 27.24541162]
[-2.12117364 27.24541162]
[-2.12117364 27.29231704]
[-2.15947175 27.29231704]
[-2.15947175 27.32358732]
[-2.18500383 27.32358732]
[-2.18500383 27.34443417]
[-2.20202521 27.34443417]
[-2.20202521 27.35833207]
[-2.2133728  27.35833207]
[-2.2133728  27.36759734]
[-2.22093786 27.36759734]
[-2.22093786 27.37377418]
[-2.22

array([[-2.23606798],
       [27.38612788]])

In [47]:
coordinate_descent(x1,y1.reshape(-1,1),num_iters=500,alpha=0.01,normalize=False)

[0. 0.]


array([[-0.99893356],
       [ 4.99963105]])

In [48]:
coordinate_descent(xi,yi.reshape(-1,1),num_iters=10000,normalize=True)

[0. 0.]


array([[-38.3689919 ],
       [106.01352582]])

In [49]:
coordinate_descent(xi,yi.reshape(-1,1),num_iters=5500,alpha=0.0001,normalize=False)

[0. 0.]


array([[-3.89553762],
       [ 1.19300925]])

In [57]:
xi=df['population'].values.reshape(-1,1)
yi=df['profit'].values
xi=np.c_[np.ones(xi.shape[0]),xi]
xi=xi/(np.linalg.norm(xi,axis=0))
#print(xi)
from sklearn.linear_model import LinearRegression
reg_1=LinearRegression(fit_intercept=False)
reg_1.fit(xi,yi)
print(reg_1.intercept_,reg_1.coef_)
reg_1.score(xi,yi)

0.0 [-38.3689919  106.01352582]


0.7020315537841397