## Class 를 이용하여선형회귀 구현

In [1]:
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt

np.random.seed(0)

In [2]:
# 수치미분

def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

In [3]:
class LinearRegressionTest:
    
    # constructor
    def __init__(self):       
                
        print("LinearRegressionTest Object is created")
        
    
    # obtain current W and current b
    def get_W_b(self):
        
        return self.W, self.b
    
    
    # loss function
    def loss_func(self, xdata, tdata):
                
        y = np.dot(xdata, self.W) + self.b
    
        return ( np.sum( (tdata - y)**2 ) ) / ( len(xdata) )
        
    
    # display current error value
    def error_val(self, xdata, tdata):
        
        y = np.dot(xdata, self.W) + self.b
    
        return ( np.sum( (tdata - y)**2 ) ) / ( len(xdata) )
    
    
    # predict method
    def predict(self, test_data):
        
        y = np.dot(test_data, self.W) + self.b
        
        return y
    
    
    # train method
    def train(self, xdata, tdata, learning_rate, iteration_count):
        
        self.W = np.random.rand(xdata.shape[-1], 1)
        self.b = np.random.rand(1)
    # learning rate, xdata등의 객체를 만들때마다 변수로 넘겨준다
    #메모리의 관리 측면에서 유리
    #한번 입력데이터와 출력데이터를 임시로 변수로 보관하고 날림
        f = lambda x : self.loss_func(xdata, tdata)

        start_time = datetime.now()
        
        for step in  range(iteration_count):  
    
            self.W -= learning_rate * numerical_derivative(f, self.W)
    
            self.b -= learning_rate * numerical_derivative(f, self.b)
    
            if (step % int(iteration_count*0.05) == 0):
                print("step = ", step, "error value = ", self.error_val(xdata, tdata))
                
        end_time = datetime.now()
        
        print("")
        print("Elapsed Time => ", end_time - start_time)

In [4]:
try:
    
    loaded_data = np.loadtxt('./(200302)sps.csv', delimiter=',', dtype=np.float32)

    x_data = loaded_data[ :, 1:]
    t_data = loaded_data[ :, [0]]

    # 데이터 차원 및 shape 확인
    print("x_data.ndim = ", x_data.ndim, ", x_data.shape = ", x_data.shape)
    print("t_data.ndim = ", t_data.ndim, ", t_data.shape = ", t_data.shape) 

except FileNotFoundError as err:
    print(str(err))
    
except Exception as err:
    print(str(err))

x_data.ndim =  2 , x_data.shape =  (50, 4)
t_data.ndim =  2 , t_data.shape =  (50, 1)


### learning_rate = 1e-3,  반복횟수 20,000번 수행하는 obj1

In [5]:
obj1 = LinearRegressionTest()

obj1.train(x_data, t_data, 1e-3, 20001)

LinearRegressionTest Object is created
step =  0 error value =  60.60477607233993
step =  1000 error value =  0.004047845907162692
step =  2000 error value =  0.0004872236912823996
step =  3000 error value =  5.8645248081851033e-05
step =  4000 error value =  7.058903711209303e-06
step =  5000 error value =  8.496531813550114e-07
step =  6000 error value =  1.0226949652818599e-07
step =  7000 error value =  1.2309787275169963e-08
step =  8000 error value =  1.481681908152528e-09
step =  9000 error value =  1.7834437166247197e-10
step =  10000 error value =  2.14666283829319e-11
step =  11000 error value =  2.583855772641046e-12
step =  12000 error value =  3.1100881488736944e-13
step =  13000 error value =  3.743493890471245e-14
step =  14000 error value =  4.505900113932455e-15
step =  15000 error value =  5.423579216066238e-16
step =  16000 error value =  6.52815459867362e-17
step =  17000 error value =  7.857690008217823e-18
step =  18000 error value =  9.458000113628037e-19
step = 

In [6]:
test_data = np.array([ [4, 4, 4, 4], [-3, 0, 9, -1], [-7, -9, -2, 8], [1, -2, 3, -2], [19, -12, 0, -76], [2001, -1, 109, 31], [-1, 102, -200 , 1000] ])

for data in test_data:
    print(obj1.predict(data)) 

[-1.68596503e-11]
[7.]
[-8.]
[8.]
[107.]
[2079.99999996]
[-1303.00000001]


In [7]:
print(obj1.get_W_b())

(array([[ 1.],
       [-1.],
       [ 1.],
       [-1.]]), array([1.58863341e-10]))
