<a href="https://colab.research.google.com/github/RogueRock/IDC410-ML/blob/main/ML_exercise2_logistic_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""generate an m+1 dimensional data set, of size n, consisting of m continuous independent
variables (X) and one dependent binary variable (Y) """
import numpy as np
def dataset_generator (n,m, theta):
  X = np.random.rand(n,m)
  #to get 1 in the first column as the bias
  X_bias = np.column_stack([np.ones(n), X])
  #the coefficients of the linear relationship of X and Y
  beta = np.random.rand(m+1)
  #calculating dot product of X_bias and beta
  X_product = np.dot(X_bias, beta)
  prob = 1/(1+np.exp(-X_product))
  #assuming Bernoulli distribution with a probability of success theta to determine flipping
  flip = np.random.binomial(1, theta, size = n)
  Y = np.zeros(n)
  for i in range (n):
     if flip[i] == 1:
      Y[i] = 1 - prob[i]
     else:
      Y[i] = prob[i]
  Y = (Y > 0.5).astype(int)
  return X_bias, beta , Y
n = 50
m = 3
theta = 0.5
X_bias, beta, Y = dataset_generator (n,m,theta)
print ("The independent variable (X): " )
print (X_bias)
print ("-" * 45)
print ("The random coefficients (beta) : ",beta)
print("The dependent variable (Y): " , Y)

In [None]:
""" Function to learn the inputs of the logistic regression
and implementing the gradient descent algotrithm for the cost function """
#Z is what we get after dot product of X_i with beta_j
def sigmoid (Z):
  return 1/(1+np.exp(-Z))
#the cost function log loss
def log_loss (Y_hat,Y):
  return -np.mean((Y * np.log(Y_hat) + (1-Y) * np.log(1-Y_hat)))
def gradient_descent (X, Y, learn_rate, tau, epoch):
  beta_initialized = np.random.rand(m+1)
  print("initial betas : " , beta_initialized)
  error = []
  prev_cost = np.inf
  cost = np.zeros(epoch)
  for i in range (epoch):
    Z = np.dot(X, beta_initialized)
    Y_hat = sigmoid(Z)
    cost[i] = log_loss(Y_hat,Y)
    derivative = np.dot(X.T, (Y_hat-Y))/(Y.size)
    beta_initialized = beta_initialized - learn_rate * derivative
    error.append(cost[i])
    if epoch > 0 and np.abs(cost[i] - cost[i - 1]) < tau:
       break
    final_cost = log_loss(Y_hat,Y)
    return  beta_initialized, final_cost
n = 50
m = 3
theta = 0.9
X_bias, beta_original, Y = dataset_generator(n,m,theta)
learn_rate = 0.001
tau = 1*10**-6
epoch = 1000
learned_beta , final_cost = gradient_descent(X_bias, Y, learn_rate, tau, epoch)
print("Final betas : " , learned_beta)
print("The final cost : ", final_cost)


In [None]:
#adding L1 and L2 regularization to the cost funtion
def l1_regularization (Y_hat, Y, penalty_l1, params):
  return -np.mean(Y * np.log(Y_hat) + (1-Y) * np.log(1-Y_hat)) + penalty_l1 * np.mean(np.abs(params))
def l2_regularization (Y_hat, Y, penalty_l2, params):
  return -np.mean(Y * np.log(Y_hat) + (1-Y) * np.log(1-Y_hat)) + penalty_l2 * np.mean(np.square(params))
#initializing gradient descent with L1 regularization
def gradient_descent_l1 (X, Y ,params, learn_rate,tau, epoch):
  error = []
  prev_cost = np.inf
  cost = np.zeros(epoch)
  for i in range (epoch):
    Z = np.dot(X, params)
    Y_hat = sigmoid(Z)
    cost[i] = l1_regularization(Y_hat,Y, penalty_l1, params)
    derivative = np.dot(X.T, (Y_hat-Y))/(Y.size) + penalty_l1 * np.sign(params)
    params = params - learn_rate * derivative
    error.append(cost[i])
    if epoch > 0 and np.abs(cost[i] - cost[i - 1]) < tau:
       break
    final_cost_l1 = l1_regularization(Y_hat, Y, penalty_l1, params)
    return  params, final_cost_l1
#initializing gradient descent with L1 regularization
def gradient_descent_l2 (X, Y, params, learn_rate, tau, epoch):
  error = []
  prev_cost = np.inf
  cost = np.zeros(epoch)
  for i in range (epoch):
    Z = np.dot(X, params)
    Y_hat = sigmoid(Z)
    cost[i] = l2_regularization(Y_hat,Y, penalty_l2, params)
    derivative = np.dot(X.T, (Y_hat-Y))/Y.size + penalty_l2 * 2 * params
    params = params - learn_rate * derivative
    error.append(cost[i])
    if epoch > 0 and np.abs(cost[i] - cost[i - 1]) < tau:
       break
    final_cost_l2 = l2_regularization(Y_hat, Y, penalty_l2, params)
    return  params, final_cost_l2
params = np.random.randn(m+1)
penalty_l1 = 2.12
penalty_l2 = 1
learned_params_l1 , final_cost_l1 = gradient_descent_l1 (X_bias, Y, params, learn_rate, tau, epoch)
learned_params_l2 , final_cost_l2 = gradient_descent_l2 (X_bias, Y, params, learn_rate, tau, epoch)
print ("Learned beta values (L1 regularization) : " ,learned_params_l1)
print (final_cost_l1)
print ("Learned beta values (L2 regularization) : " ,learned_params_l2)
print (final_cost_l2)
