## 다항 변수에 대한 regression. 다항 회귀 분석

In [0]:
def prn_shape(*args):
  print(' '.join([str(arg.shape) for arg in args]))

In [0]:
import numpy as np
from sklearn import datasets
iris = datasets.load_iris()

In [0]:
X = iris.data
Y = iris.target
Y = Y.reshape((-1, 1))

In [13]:
NUM = 100
Xtr = X[:NUM]
Ytr = Y[:NUM]
Xte = X[NUM:]
Yte = Y[NUM:]
print(Xtr.shape)
print(Xte.shape)

(100, 4)
(50, 4)


In [0]:
def rmse(arr1, arr2):
    return np.sqrt(np.mean((arr1-arr2)**2))

In [0]:

# 주어진 모델로 실행하고, 결과 성능 출력하기
from sklearn.metrics import mean_squared_error, r2_score

def exe_regression(model, Xtr, Ytr, Xte, Yte):
  prn_shape(Xtr, Ytr, Xte, Yte)
  model.fit(Xtr, Ytr)
  Y2 = model.predict(Xte)
  #print('Y2.shape', Y2.shape)
  rmse_v = rmse(Yte, Y2)
  print("RMSE: %.4f" % (rmse_v))
  print("MSE: %.4f" % mean_squared_error(Yte, Y2))
  print('R2: %.4f' % r2_score(Yte, Y2))

In [0]:
"""
Gradient Descent 만들기
"""
def my_regrssion_with_gr(X, Y, debug=False):
  m = X.shape[0]
  dim = X.shape[1]
  
  lr = 0.003 # learning rate (eta)
  epochs = 30000
  alpha = .5
  W = np.random.randn(dim, 1)
  for i in range(epochs):     
      y2 = np.matmul(X, W) # y2 == predicted y
      err = y2 - Y # error, loss, residual (오차)
      #print(W.shape,X.shape, Y.shape, tmp.shape)
      grad = np.matmul(X.T, err) 
      #print(tmp2.shape)
      gradients = float(2/m) * grad  # mean of gradients
      get_sign = lambda x: 1 if x > 0 else -1 if x < 0 else 0
      sign_of_W = np.array([get_sign(w) for w in W])
      #print('DEBUG', gradients.shape, sign_of_W.shape)
      gradients += alpha * sign_of_W.reshape((-1, 1))
      #print(gradients.shape)
      W = W - lr * gradients
      sum_of_each_grad = np.sum(np.abs(gradients))
      if sum_of_each_grad < 0.00004:
        print('Stop at epoch', i)
        break
      #break
  return W


In [0]:
### 내 regression 클래스 만들기
class MyRegressWithGradientDescent(object):    
  def fit(self, X, Y):
    rows = X.shape[0] # X.shape[0] - 행의 갯수
    X = X.reshape((rows, -1)) # 1차원벡터인 경우, 행열로 변환
    Y = Y.reshape((rows, -1)) # 1차원벡터인 경우, 행열로 변환
    
    # bias 에 해당하는 [1,1,1, ..,1] 칼럼을 X에 추가한다.
    Xb = np.append(X, np.ones((rows, 1)), axis=1)
    self.w = my_regrssion_with_gr(Xb, Y)
    print('W', self.w)
    
  def predict(self, X):
    rows = X.shape[0] # X.shape[0] - 행의 갯수
    Xb = np.append(X, np.ones((rows, 1)), axis=1)
    pred = np.matmul(Xb, self.w)
    return pred

In [18]:
from sklearn.linear_model import LinearRegression

mymodel = MyRegressWithGradientDescent()
skmodel = LinearRegression()

print('=== My Regression')
exe_regression(mymodel, Xtr, Ytr, Xte, Yte)


print('=== scikit-learn Regression')
exe_regression(skmodel, Xtr, Ytr, Xte, Yte)
print('W', skmodel.coef_, skmodel.intercept_)

  

=== My Regression
(100, 4) (100, 1) (50, 4) (50, 1)
W [[-0.00144631]
 [-0.00104962]
 [ 0.18429087]
 [ 0.00169068]
 [-0.00052528]]
RMSE: 0.9916
MSE: 0.9833
R2: 0.0000
=== scikit-learn Regression
(100, 4) (100, 1) (50, 4) (50, 1)
RMSE: 0.6191
MSE: 0.3833
R2: 0.0000
W [[-0.02848968 -0.16819751  0.20313089  0.28785017]] [0.36970342]
