In [None]:
from random import random
from statistics import linear_regression

from IPython.display import display
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import mglearn
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor

In [None]:
mglearn.plots.plot_knn_regression(n_neighbors=1)

In [None]:
mglearn.plots.plot_knn_regression(n_neighbors=3)

In [None]:
from sklearn.neighbors import KNeighborsRegressor
X,y = mglearn.datasets.make_wave(n_samples=40)
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=0)

reg=KNeighborsRegressor(n_neighbors=3)
reg.fit(X_train,y_train)

In [None]:
print("테스트 세트 예측 : \n", reg.predict(X_test))

In [None]:
print("테스트 세트 R^2 : ", reg.score(X_test,y_test))

In [None]:
fig, axes = plt.subplots(1,3,figsize=(15,4))
line=np.linspace(-3,3,1000).reshape(-1,1)
for n_neighbors, ax in zip([1,3,9],axes):
    reg=KNeighborsRegressor(n_neighbors=n_neighbors)
    reg.fit(X_train,y_train)
    ax.plot(line,reg.predict(line))
    ax.plot(X_train,y_train,'^',c=mglearn.cm2(0),markersize=8)
    ax.plot(X_test,y_test,'v',c=mglearn.cm2(1),markersize=8)
    ax.set_title(
        "{} 이웃의 훈련 스코어: {:.2f} 테스트 스코어: {:.2f}".format(n_neighbors,reg.score(X_train,y_train), reg.score(X_test,y_test))
    )
    ax.set_xlabel("특성")
    ax.set_ylabel("타겟")
    axes[0].legend(["모델 예측","훈련 데이터/타깃","테스트 데이터/타깃"],loc="best")

In [None]:
mglearn.plots.plot_linear_regression_wave()

In [None]:
from sklearn.linear_model import LinearRegression
X,y = mglearn.datasets.make_wave(n_samples=60)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42)
lr=LinearRegression().fit(X_train,y_train)
#weight
print(lr.coef_)
#bias
print(lr.intercept_)

In [None]:
print("train set score : ", lr.score(X_train,y_train))
print("test set score : ", lr.score(X_test,y_test))


보스턴 집값

In [None]:
X,y = mglearn.datasets.load_extended_boston()
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=0)
lr=LinearRegression().fit(X_train,y_train)

In [None]:
#Shows that the model overfit by LR
print(f"train score : {lr.score(X_train,y_train):.2f}")
print(f"test score : {lr.score(X_test,y_test):.2f}")

In [None]:
#Ridge is less flexible model than LR so better results
from sklearn.linear_model import Ridge
ridge = Ridge().fit(X_train,y_train)
print(f"train score : {ridge.score(X_train,y_train):.2f}")
print(f"test score : {ridge.score(X_test,y_test):.2f}")

In [None]:
ridge10 = Ridge(alpha=10).fit(X_train,y_train)
print(f"train score : {ridge10.score(X_train, y_train):.2f}")
print(f"test score : {ridge10.score(X_test, y_test):.2f}")

In [None]:
#alpha 값 줄이면 계수에 대한 제약이 그만큼 풀리니 선형회귀와 비슷
ridge01 = Ridge(alpha=0.1).fit(X_train,y_train)
print(f"train score : {ridge01.score(X_train, y_train):.2f}")
print(f"test score : {ridge01.score(X_test, y_test):.2f}")

In [None]:
plt.plot(ridge10.coef_,'^',label="Ridge alpha=10")
plt.plot(ridge.coef_,'^',label="Ridge alpha=1")
plt.plot(ridge01.coef_,'^',label="Ridge alpha=0.1")

plt.plot(lr.coef_,'o',label="LinearRegression")
plt.xlabel("Number of Weights")
plt.ylabel("Size of Weights")
xlims=plt.xlim()
plt.hlines(0,xlims[0],xlims[1])
plt.xlim(xlims)
plt.ylim(-25,25)
plt.legend()


In [None]:
mglearn.plots.plot_ridge_n_samples()

In [None]:
#lasso 로 인해 어떤 계수는 0이 되기도 함
from sklearn.linear_model import Lasso
lasso =Lasso().fit(X_train,y_train)
print(f"train set score : {lasso.score(X_train,y_train):.2f}")
print(f"test set score : {lasso.score(X_test,y_test):.2f}")
print(f"number of feature used : {np.sum(lasso.coef_ !=0)}")

In [None]:
lasso001 = Lasso(alpha=0.01,max_iter=100000).fit(X_train,y_train)
print(f"train set score : {lasso001.score(X_train,y_train):.2f}")
print(f"test set score : {lasso001.score(X_test,y_test):.2f}")
print(f"number of feature used : {np.sum(lasso001.coef_ !=0)}")

In [None]:
#alpha 값을 너무 낮추면 규제의 효과가 없어져 과대적합이 되므로 LR 과 결과 비슷해짐
lasso00001 = Lasso(alpha=0.0001,max_iter=100000).fit(X_train,y_train)
print(f"train set score : {lasso00001.score(X_train,y_train):.2f}")
print(f"test set score : {lasso00001.score(X_test,y_test):.2f}")
print(f"number of feature used : {np.sum(lasso00001.coef_ !=0)}")