In [None]:
import numpy as np
from matplotlib import pyplot as plt
from random import gauss

In [None]:
def gaussian_generator(mu, sigma):
  data = []

  for i in range(100):
    data.append(gauss(mu, sigma))

  return data

In [None]:
cls_0_mu = 5
cls_1_mu = 8
cls_0_sigma = 0.6
cls_1_sigma = 0.5

class_0 = gaussian_generator(cls_0_mu, cls_0_sigma)
class_1 = gaussian_generator(cls_1_mu, cls_1_sigma)   

plt.hist(class_0, color = "skyblue", bins = 100)
plt.hist(class_1, color = "red", bins = 100)
plt.title("data distribution")
plt.show()

In [None]:
x = np.array(class_0 + class_1).reshape(len(class_0) + len(class_1), 1)
y = np.concatenate((np.zeros(len(class_0)), np.ones(len(class_1))), axis= 0).reshape(len(class_0) + len(class_1), 1)

print(x.shape)
print(y.shape)

(200, 1)
(200, 1)


In [None]:
class LogisticRegression:
  def __init__(self, train_data, label_data, lr = 1e-2, delta = 1e-7, max_iter = 10000, cost_threshold = 1e-2, class_threshold = 0.5, verbose = True):
    self.x_data = train_data
    self.y_data = label_data
    self.lr = lr
    self.max_iter = max_iter
    self.delta = delta
    self.cost_threshold = cost_threshold
    self.class_threshold = class_threshold
    self.verbose = verbose
  
  def sigmoid(self, x):
    return 1/(1 + np.exp(-x))

  def loss_func(self, h, y): 
    # !log(0)
    return - np.sum(y * np.log(h + self.delta) + (1 - y)*np.log((1-h) + self.delta))

  def train(self):

    self.w = np.random.rand(1,1)
    self.b = np.random.rand(1)

    w_list = []

    for i in range(self.max_iter):

      w_list.append(self.w)

      z = np.dot(self.x_data, self.w) + self.b # (100,)
      h = self.sigmoid(z) # predicted possibility of x, (10,)
      
      diff = h - self.y_data #(100, 100)

      cost = self.loss_func(h, self.y_data)

      gradient = np.dot(self.x_data.transpose(), diff) #(1, 100)

      self.w -= self.lr * gradient / self.x_data.shape[0]
      self.b -= self.lr * np.mean(diff) / self.x_data.shape[0]


      if cost < self.cost_threshold:
        return False

      if (self.verbose == True and i %100000 == 0):
        print('cost of {} iteration: {}'.format(i, cost))

    # plot w
    y = np.arrange(self.max_iter)  
    plt.plot(w_list, y)
    plt.show()


  def predict(self, x):
    z = np.dot(x, self.w) + self.b
    h = self.sigmoid(z) # predicted possibility of x

    h_class = 1 if h > self.class_threshold else 0


    return h, h_class


  def getwb(self):
    return self.w, self.b

In [None]:
if __name__ == "__main__":
  LR = LogisticRegression(train_data = x, label_data = y, lr = 0.001, max_iter = 10000000)

  LR.train()

  (predicted_prob, predicted_class) = LR.predict(2)
  print(predicted_prob, predicted_class)

  (predicted_prob, predicted_class) = LR.predict(8)
  print(predicted_prob, predicted_class)

In [None]:
(w, b) = LR.getwb()
print(w, b)