### sklearn.preprocessing.PolynomialFeatures
* class sklearn.preprocessing.PolynomialFeatures(degree=2, *, interaction_only=False, include_bias=True, order='C')

In [2]:
from sklearn.preprocessing import PolynomialFeatures
import numpy as np

X = np.arange(4).reshape(2, 2)
print('일차 단항식 계수 feature:\n', X)

일차 단항식 계수 feature:
 [[0 1]
 [2 3]]


In [3]:
poly = PolynomialFeatures(degree=2)
poly.fit(X)
poly_ftr = poly.transform(X)
print('변환된 2차 다항식 계수 feature: \n', poly_ftr)

변환된 2차 다항식 계수 feature: 
 [[1. 0. 1. 0. 0. 1.]
 [1. 2. 3. 4. 6. 9.]]


In [4]:
def polynomial_func(X):
    y = 1 + 2*X + X**2 + X**3
    return y

X = np.arange(4).reshape(2, 2)
print('일차 단항식 계수 feature: \n', X)
y = polynomial_func(X)
print('삼차 다항식 결정값 : \n', y)

일차 단항식 계수 feature: 
 [[0 1]
 [2 3]]
삼차 다항식 결정값 : 
 [[ 1  5]
 [17 43]]


In [7]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
p_model = Pipeline([('Poly', PolynomialFeatures(
    degree=2, include_bias=False, interaction_only=False, order='C')),
    ('linear',
     LinearRegression())])
p_model

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split

column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 
                'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']

df = pd.read_csv('../datasets/housing.csv', header=None, 
                 delimiter=r"\s+", names=column_names)

df.head()

y_target = df['MEDV']
X_data = df.drop(['MEDV'], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X_data, y_target, test_size=0.3, random_state=156)


In [9]:
Pipeline(memory=None,
         steps=[('poly',
                 PolynomialFeatures(degree=2, include_bias=False,
                                    interaction_only=False, order='C')),
                                    ('linear',
                                     LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
                                                      ))], 
                                                      verbose=False)

In [11]:
from sklearn.metrics import mean_squared_error, r2_score


In [13]:
p_model.fit(X_train, y_train)
y_preds = p_model.predict(X_test)
mse = mean_squared_error(y_test, y_preds)
rmse = np.sqrt(mse)

print('MSE: {0: .3f}, RMSE : {1: .3F}'.format(mse, rmse))
print('Variance socre : {0: .3f}'.format(r2_score(y_test, y_preds)))
# tlqkf

MSE:  15.556, RMSE :  3.944
Variance socre :  0.782
