In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from random import seed
from random import randrange

seed(10)

In [2]:
def is_similar(X, Y):
    if isinstance(X, list) and isinstance(Y, list):
        if len(X) == len(Y):
            return all([is_similar(x,Y[i]) for i,x in enumerate(X)])
        else:
            return False
    return abs(X-Y) <= 1e-7

#### Hypothesis Function

$$ h(x) = w^Tx $$

In [15]:
def h(x):
    global w
    return np.matmul (x, w)

In [16]:
# code ini digunakan untuk memeriksa apakah fungsi h yang Anda implementasikan sudah sesuai
w, x = [10,11,12], [9,8,7]
assert is_similar(h(x),262)

w, x = [-3,-6,-3], [1,6,9]
assert is_similar(h(x),-66)

#### Linear Cost/Loss Function

$$J(w) = \frac{1}{2}\sum_{i=1}^m (h_w(x^{(i)}) - y^{(i)})^2 $$

In [31]:
def cost_func_linreg(X, y):
    m = len(y)
    cost = 0
    for i in range(m):
      cost += np.power(h(X[i]) - y[i], 2)
    return cost / 2

In [32]:
# code ini digunakan untuk memeriksa apakah cost function yang Anda implementasikan sudah sesuai
w, X, y = [-1, 0], [[1,1],[0,1]], [-1,0]
assert is_similar(cost_func_linreg(X,y),0)

w, X, y = [1, 1, 2], [[1,1,1],[0,0,0]], [0,0]
assert is_similar(cost_func_linreg(X,y),8)

#### Optimization with Stochastic Gradient Descent (SGD)

$$w_j = w_j + \alpha(y^{(i)} - h_w(x^{(i)}))x_j^{(i)}$$

In [23]:
def SGD(X, y, lr=1e-4):
    global w
    alpha, m = lr, len(y)
    for row in range(m):
        for col in range(len(w)):
            w[col] += alpha * (y[row] - h(X[row])) * X[row][col]
    return w

In [24]:
# code ini digunakan untuk memeriksa apakah fungsi SGD yang Anda implementasikan sudah sesuai
w, X, y = [-1, 0], [[1,2],[0,1]], [-1,0]
assert is_similar(SGD(X,y),[-1.0, 0.0])

w, X, y = [1, 1, 2], [[1,1,1],[0,0,0]], [0,0]
assert is_similar(SGD(X,y),[0.9996, 0.99960004, 1.999600079996])

#### Convergence

In [33]:
def is_convergen(history_cost, error=1e-5):
    termination_condition = False
    # check delta cost from the history
    if len(history_cost) >= 2:
        termination_condition = np.linalg.norm(history_cost[-1] - history_cost[-2]) < error
    return termination_condition

In [12]:
# PLAYGROUND
# you can do anything here as long as not adding any new import
features = data.iloc[:,:8].copy().values
# copy variabel target yang sesuai
target = age.copy()
target = target.values
w = [1, 1, 1, 1, 1, 1, 1, 1]

history = []
print('cost func before', cost_func_linreg(features, target))
print('weight before', w)

number_of_iteration = 0

# bagian ini dapat anda modifikasi sesuai dengan fungsi is_convergen yang telah dibuat di atas
while not is_convergen(history, 1):
    history.append(cost_func_linreg(features, target)) 
    w = SGD(features, target, lr=0.001)
    number_of_iteration += 1

print('cost func after ' + str(number_of_iteration) + ' iteration ' + str(cost_func_linreg(features, target)))
print('weight after ' + str(number_of_iteration) + ' iteration ' + str(w))

print('\ncost/loss func plot')
plt.figure(figsize=(10,5))
plt.plot(history)
plt.xlabel('number of iteration')
plt.ylabel('loss')
plt.grid(True)
plt.show()

NameError: name 'data' is not defined

In [None]:
class LinearRegression:
    def __init__(self, lr=1e-4, max_iter=100000):
        self.lr = lr
        self.max_iter = max_iter
        self.w = []
        self.b = 0
        self.history = []
    
    def __h(self, x):
        # YOUR CODE HERE
        w_t = np.transpose(np.array(self.w))
        return np.dot(w_t, x) + self.b
    
    def __cost_func_linreg(self, X, y):
        m = len(y)
        # YOUR CODE HERE
        cost = 0
        for i in range(m):
          cost += np.power(self.__h(X[i])-y[i], 2)
        return cost/2
    
    def __SGD(self, X, y, lr):
        alpha, m = lr, len(y)
        for i in range(m):
          # YOUR CODE HERE
          for j in range(len(self.w)):
              self.w[j] += alpha * (y[i] - self.__h(X[i])) * X[i][j]
          # dw = np.dot(X.T, (y[i] - self.__h(X[i]))
          # self.w += alpha * dw

          db = np.sum(y[i] - self.__h(X[i]))
          self.b += alpha * db


    def __is_convergen(self, history_cost, error=1e-5):
        termination_condition = False
        # YOUR CODE HERE
        if len(history_cost) >= 2:
            termination_condition = np.linalg.norm(history_cost[-1] - history_cost[-2]) < error
        return termination_condition
    
    def fit(self, X, y):
        '''Build a linear regression model from the training set (X, y)'''
        # YOUR CODE HERE
        # initialization
        n_samples, n_features = X.shape
        features = X
        target = y
        number_of_iteration = 0
        self.w = np.ones(n_features)         

        # YOUR CODE HERE
        while not self.__is_convergen(self.history, 1e-7):
            self.history.append(self.__cost_func_linreg(features, target))
            self.__SGD(features, target, self.lr)
            number_of_iteration += 1    
        
    def predict(self, X):
        '''Predict value for X'''
        # YOUR CODE HERE
        # multiply each data point to the corresponding weights
        w_arr_t = np.transpose(np.array(self.w))
        prediction = np.matmul(X, w_arr_t) + self.b
        return prediction