### Importing Necessary Libraries 

In [3]:
import numpy as np
import pandas as pd


### Reading CSV files

In [26]:
data_linear=pd.read_csv("linear_regression.csv")
data_logistic=pd.read_csv("logistic regression dataset-Social_Network_Ads.csv")

In [27]:
data_linear.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [99]:
data_logistic.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,0,19,19000,-1
1,15810944,0,35,20000,-1
2,15668575,1,26,43000,-1
3,15603246,1,27,57000,-1
4,15804002,0,19,76000,-1


### Defining Numerical Gradient Function

In [29]:
def numericalGrad(funObj,w,epsilon):
    m=w.shape
    grad=np.zeros(m)
    for i in range(m[0]):
        wp=np.copy(w)
        wn=np.copy(w)
        wp[i] = w[i] + epsilon
        wn[i] = w[i] - epsilon
        grad[i] = (funObj(wp)-funObj(wn))/(2*epsilon)
    return grad

### Defining Logistic Loss function

In [83]:
def log_loss(w,x,y):
    h_w_x = np.dot(w.T,x)
    loss = np.sum(np.log(1+np.exp(-1*y*h_w_x)),axis=1)/x.shape[1]
    return loss


### Defining Least Square Loss

In [63]:
def square_loss(w,x,y):
    loss = np.sum((y-np.dot(w.T,x))**2)/x.shape[1]
    return loss

### Defining Gradient of Logistic Loss Function


In [88]:
def grad_log(w,x,y):
    h1=np.exp(-1*y*np.dot(w.T,x))
    z=h1/(1+h1)
    z = z/x.shape[1]
    a=np.sum(-1*z*y*x,axis=1)
    return np.array(a).reshape((-1,1))
    

### Defining Gradient of Least Square Loss Function

In [64]:
def grad_square(w,x,y):
    a=np.sum(-2*(y-np.dot(w.T,x))*x,axis=1)/x.shape[1]
    # print(a)
    return a.reshape((-1,1))

### Defining Gradient Descent

In [52]:
def grad_descent(shape,grad_func,lr=0.0001,num_itr=10000):
    w=np.zeros((shape,1))
    for _ in range(num_itr):
        # Compute the gradient of the loss function
        gradient = grad_func(w)

        # Update the weight vector
        w = w - lr * gradient
    return w

### Handling Categorical Values

In [35]:
data_linear["sex"] = data_linear["sex"].replace({"male": 0, "female": 1})
data_linear["smoker"] = data_linear["smoker"].replace({"yes": 1, "no": 0})
data_linear["region"] = data_linear["region"].replace({"northwest": 0, "northeast": 1, "southeast": 2, "southwest":3})
data_logistic["Gender"] = data_logistic["Gender"].replace({"Male":0, "Female":1})
data_logistic["Purchased"]=data_logistic["Purchased"].replace({0:-1})

In [36]:
data_linear.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,1,27.9,0,1,3,16884.924
1,18,0,33.77,1,0,2,1725.5523
2,28,0,33.0,3,0,2,4449.462
3,33,0,22.705,0,0,0,21984.47061
4,32,0,28.88,0,0,0,3866.8552


In [40]:
data_logistic.head(8)

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,0,19,19000,-1
1,15810944,0,35,20000,-1
2,15668575,1,26,43000,-1
3,15603246,1,27,57000,-1
4,15804002,0,19,76000,-1
5,15728773,0,27,58000,-1
6,15598044,1,27,84000,-1
7,15694829,1,32,150000,1


### Extracting X and Y values in form of numpy array for linear_regression data

In [92]:
linear_x=data_linear.iloc[:,:-1].values
linear_x=linear_x.T
linear_x = (linear_x - linear_x.min())/(linear_x.max() - linear_x.min())
linear_x.shape

(6, 1338)

In [93]:
linear_y=data_linear.iloc[:,-1].values
linear_y=linear_y.reshape((1,-1))
linear_y.shape

(1, 1338)

### Verifying Least Square Loss using both gradients

In [121]:
# calculating derivative gradient
gradientFunc = lambda w: grad_square(w, linear_x, linear_y)
w_derivative = grad_descent(linear_x.shape[0], gradientFunc)

# calculating numerical gradient
funcObj = lambda w: square_loss(w, linear_x, linear_y)
gradientFunc = lambda w: numericalGrad(funcObj, w, 0.00001)
w_numerical = grad_descent(linear_x.shape[0], gradientFunc)

# making DataFrame for comparing weights
data=pd.DataFrame(pd.DataFrame({"Derivative approach": w_derivative.T[0], "Numerical Gradient approach": w_numerical.T[0]}),columns=["Derivative approach","Numerical Gradient approach"])
print(data)

   Derivative approach  Numerical Gradient approach
0         10042.605755                 10042.605623
1           103.353506                   103.353672
2          7433.127557                  7433.127560
3           281.532366                   281.532556
4           167.890072                   167.890392
5           352.358655                   352.358679


### Extracting X and Y values in form of numpy array for logistic_regression data

In [122]:
logistic_x=data_logistic.iloc[:,:-1].values
logistic_x=logistic_x.T
logistic_x = (logistic_x - logistic_x.min())/(logistic_x.max() - logistic_x.min())
logistic_x.shape


(4, 400)

In [123]:
logistic_y=data_logistic.iloc[:,-1].values
logistic_y=logistic_y.reshape((1,-1))
logistic_y.shape

(1, 400)

### Verifying Logistic Loss using both gradients

In [124]:
# calculating derivative gradient
gradientFunc = lambda w: grad_log(w, logistic_x, logistic_y)
w_derivative = grad_descent(logistic_x.shape[0], gradientFunc)

# calculating numerical gradient
funcObj = lambda w: log_loss(w, logistic_x, logistic_y)
gradientFunc = lambda w: numericalGrad(funcObj, w, 0.00001)
w_numerical = grad_descent(logistic_x.shape[0], gradientFunc)

# making DataFrame for comparing weights
data=pd.DataFrame(pd.DataFrame({"Derivative approach": w_derivative.T[0], "Numerical Gradient approach": w_numerical.T[0]}),columns=["Derivative approach","Numerical Gradient approach"])
print(data)

   Derivative approach  Numerical Gradient approach
0        -1.253286e-01                -1.253286e-01
1        -3.430467e-09                -3.430530e-09
2        -1.032999e-07                -1.033000e-07
3        -1.834312e-04                -1.834312e-04
