# 단변수 선형회귀

In [73]:
import numpy as np

x= np.array([30.0, 100.0])
y= np.array([50.0, 160.0])

n_samples, n_features= len(x), 2
w = 0
b = 1

def predict(x, w, b):
    pred_y= np.zeros(n_samples)
    for i in range(n_samples):
        pred_y[i]= w * x[i]+b
    return pred_y
    #return w.T@X + b

print(predict(x,w,b))

[1. 1.]


In [75]:
def train(x, y):
    w, b= 0.0, 1.0
    lr, epoch= 0.01, 10
    for _ in range(epoch):
        for i in range(n_samples):
            pred_y= w * x[i] + b
            dw= ((pred_y-y[i])*x[i])/ n_samples
            db= (pred_y-y[i])/ n_samples
            w = w - lr * dw
            b = b - lr * db
            print(f'pred_y:{pred_y}, dw:{dw}, db:{db}, w:{w}, b:{b}')
    return w, b

w,b = train(x,y)
print(predict(x,w,b))

pred_y:1.0, dw:-735.0, db:-24.5, w:7.3500000000000005, b:1.245
pred_y:736.245, dw:28812.25, db:288.1225, w:-280.7725, b:-1.636225
pred_y:-8424.811225, dw:-127122.168375, db:-4237.4056125, w:990.44918375, b:40.73783112499999
pred_y:99085.656206125, dw:4946282.81030625, db:49462.8281030625, w:-48472.3789193125, b:-453.890449905625
pred_y:-1454625.2580292805, dw:-21820128.870439205, db:-727337.6290146402, w:169728.90978507957, b:6819.485840240777
pred_y:16979710.4643482, dw:848977523.2174101, db:8489775.2321741, w:-8320046.322389021, b:-78078.26648150024
pred_y:-249679467.9381521, dw:-3745192769.0722814, db:-124839758.96907605, w:29131881.368333794, b:1170319.3232092601
pred_y:2914358456.1565886, dw:145717914807.82944, db:1457179148.0782943, w:-1428047266.7099607, b:-13401472.157573683
pred_y:-42854819473.45639, dw:-642822292851.8458, db:-21427409761.728195, w:5000175661.808497, b:200872625.45970827
pred_y:500218438806.30945, dw:25010921932315.473, db:250109219323.15472, w:-245109043661.3

# 다변수 선형회귀

In [60]:
import numpy as np

X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

n_samples, n_features= X_train.shape
print(f'n_samples:{n_samples}, n_features:{n_features}')

n_samples:3, n_features:4


In [61]:
w= np.random.rand(n_features)
b= np.random.rand(1)#np.ones(n_features)

In [62]:
def predict(x, w, b):
    #return np.dot(w.T,x) + b
    return np.dot(x, w) + b
print(predict(X_train,w,b))

[1465.13666794  992.39501878  603.30999021]


In [63]:
def train(epoch, x, y, w, b, lr=0.01):
    for _ in range(epoch):
        y_pred=np.dot(x, w) + b
        error= y-y_pred
        dw= (np.dot(x.T,error))/n_samples
        db= np.sum(error)/n_samples
        w = w - lr*dw
        b = b - lr*db
    return w, b

# w,b = train(10, X_train,y_train, w, b)
# print(predict(X_train,w,b))

# Polynomial Regression
- Feature scaling
- Feature engineering

In [64]:
def minmax_scale(x):
    mn, mx = np.min(x), np.max(x)
    x= (x-mn)/mx
    return x

def zscore_scale(x):
    mean, var= np.mean(x), np.var(x)
    x= (x-mean)/var
    return x

In [65]:
#print(minmax_scale(X_train))

In [66]:
#print(zscore_scale(X_train))

In [67]:
x= np.arange(0,20,1)
y= x**2

x_aug = np.c_[x, x**2, x**3]
X_train = zscore_scale(x_aug)
X_train= X_train.reshape(-1, 1)

y_train= np.c_[y, y, y]
y_train=y_train.reshape(-1, 1)

In [68]:
n_samples, n_features= X_train.shape
w= np.random.rand(n_features)
b= np.random.rand(1)

w,b = train(10, X_train, y_train, w, b)
print(predict(X_train,w,b))

[[-13428.66842909 -13428.66842909 -13428.66842909 ... -13428.66842909
  -13428.66842909 -13428.66842909]
 [-13428.66842909 -13428.66842909 -13428.66842909 ... -13428.66842909
  -13428.66842909 -13428.66842909]
 [-13428.66842909 -13428.66842909 -13428.66842909 ... -13428.66842909
  -13428.66842909 -13428.66842909]
 ...
 [-13428.66842597 -13428.66842597 -13428.66842597 ... -13428.66842597
  -13428.66842597 -13428.66842597]
 [-13428.66836996 -13428.66836996 -13428.66836996 ... -13428.66836996
  -13428.66836996 -13428.66836996]
 [-13428.6673057  -13428.6673057  -13428.6673057  ... -13428.6673057
  -13428.6673057  -13428.6673057 ]]


In [72]:
from sklearn.linear_model import SGDRegressor

model = SGDRegressor(loss='squared_error', learning_rate='constant', eta0=0.01, max_iter=10, shuffle=True, random_state=777)

model.fit(X_train, y_train)
print(model.predict(X_train))
#print(model.score(X_train, y_train)) 

[123.25815223 123.25815223 123.25815223 123.25815233 123.25815233
 123.25815233 123.25815244 123.25815266 123.2581531  123.25815255
 123.25815321 123.25815519 123.25815266 123.25815398 123.25815925
 123.25815277 123.25815497 123.25816595 123.25815288 123.25815618
 123.25817593 123.25815299 123.2581576  123.25818987 123.2581531
 123.25815925 123.25820843 123.25815321 123.25816112 123.25823224
 123.25815332 123.2581632  123.25826199 123.25815343 123.25816551
 123.25829832 123.25815354 123.25816803 123.2583419  123.25815365
 123.25817078 123.25839338 123.25815376 123.25817374 123.25845342
 123.25815387 123.25817692 123.25852268 123.25815398 123.25818033
 123.25860183 123.25815409 123.25818395 123.2586915  123.2581542
 123.25818779 123.25879238 123.25815431 123.25819185 123.25890511]


  y = column_or_1d(y, warn=True)
