In [1]:
from sklearn.datasets import load_boston
from sklearn.preprocessing import PolynomialFeatures

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

import pandas as pd

In [2]:
boston_dataset = load_boston()
print(boston_dataset.DESCR)

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [3]:
boston_dataset.data

array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,
        4.9800e+00],
       [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,
        9.1400e+00],
       [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,
        4.0300e+00],
       ...,
       [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        5.6400e+00],
       [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,
        6.4800e+00],
       [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        7.8800e+00]])

In [4]:
polynomial_transformer = PolynomialFeatures(2)
#2차함수변환기

In [5]:
plynomial_data = polynomial_transformer.fit_transform(boston_dataset.data)
#df의 data를 2차함수에 맞게 속성을 만들어줌

In [6]:
plynomial_data.shape

(506, 105)

In [7]:
polynomial_feature_names = polynomial_transformer.get_feature_names(boston_dataset.feature_names)
#2차함수 속성들의 열 이름을 가져옴

In [8]:
x = pd.DataFrame(plynomial_data, columns=polynomial_feature_names)

In [9]:
y = pd.DataFrame(boston_dataset.target, columns=['MEDV'])

In [10]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=5)

In [11]:
model = LinearRegression()

In [12]:
model.fit(x_train, y_train)

LinearRegression()

In [13]:
y_test_prediction = model.predict(x_test)

In [14]:
mean_squared_error(y_test, y_test_prediction) ** 0.5

3.1965276510293994