In [39]:
from sklearn.datasets import load_diabetes 
from sklearn.preprocessing import PolynomialFeatures  # 다항회귀를 위해 다항 속성을 만들어주는 것
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

import pandas as pd

### 데이터 불러오기

In [20]:
# 당뇨 데이터 불러오기
diabetes_dataset = load_diabetes()

In [22]:
print(diabetes_dataset.DESCR)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - age     age in years
      - sex
      - bmi     body mass index
      - bp      average blood pressure
      - s1      tc, total serum cholesterol
      - s2      ldl, low-density lipoproteins
      - s3      hdl, high-density lipoproteins
      - s4      tch, total cholesterol / HDL
      - s5      ltg, possibly log of serum triglycerides level
      - s6      glu, blood sugar level

Note: Each of these 1

### 다항 속성으로 변환하기

In [23]:
polynomial_transformer = PolynomialFeatures(2)   # 2차 다항 회귀로 변환하는 변환기 생성 

In [24]:
polynomial_data = polynomial_transformer.fit_transform(diabetes_dataset.data)

In [25]:
polynomial_data.shape

(442, 66)

In [26]:
polynomial_feature_names = polynomial_transformer.get_feature_names(diabetes_dataset.feature_names)



In [None]:
polynomial_feature_names

### 데이터프레임으로 변환하기

In [29]:
X = pd.DataFrame(polynomial_data, columns=polynomial_feature_names)

In [30]:
X

Unnamed: 0,1,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,age^2,age sex,age bmi,age bp,age s1,age s2,age s3,age s4,age s5,age s6,sex^2,sex bmi,sex bp,sex s1,sex s2,sex s3,sex s4,sex s5,sex s6,bmi^2,bmi bp,bmi s1,bmi s2,bmi s3,bmi s4,bmi s5,bmi s6,bp^2,bp s1,bp s2,bp s3,bp s4,bp s5,bp s6,s1^2,s1 s2,s1 s3,s1 s4,s1 s5,s1 s6,s2^2,s2 s3,s2 s4,s2 s5,s2 s6,s3^2,s3 s4,s3 s5,s3 s6,s4^2,s4 s5,s4 s6,s5^2,s5 s6,s6^2
0,1.0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,0.001450,0.001930,0.002349,0.000833,-0.001684,-0.001326,-0.001653,-0.000099,0.000758,-0.000672,0.002568,0.003127,0.001108,-0.002241,-0.001765,-0.002200,-0.000131,0.001009,-0.000894,0.003806,0.001349,-0.002728,-0.002148,-0.002678,-0.000160,0.001228,-0.001089,0.000478,-0.000967,-0.000762,-0.000949,-0.000057,0.000435,-0.000386,0.001956,0.001540,0.001919,0.000115,-0.000880,0.000780,0.001212,0.001511,0.000090,-0.000693,0.000614,0.001884,0.000113,-0.000864,0.000766,0.000007,-0.000052,0.000046,0.000396,-0.000351,0.000311
1,1.0,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068330,-0.092204,0.000004,0.000084,0.000097,0.000050,0.000016,0.000036,-0.000140,0.000074,0.000129,0.000174,0.001993,0.002298,0.001175,0.000377,0.000855,-0.003322,0.001763,0.003050,0.004116,0.002650,0.001355,0.000435,0.000986,-0.003830,0.002033,0.003517,0.004746,0.000693,0.000222,0.000505,-0.001959,0.001040,0.001799,0.002428,0.000071,0.000162,-0.000629,0.000334,0.000577,0.000779,0.000367,-0.001426,0.000757,0.001309,0.001767,0.005537,-0.002939,-0.005085,-0.006861,0.001560,0.002699,0.003641,0.004669,0.006300,0.008502
2,1.0,0.085299,0.050680,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.025930,0.007276,0.004323,0.003792,-0.000484,-0.003890,-0.002917,-0.002760,-0.000221,0.000244,-0.002212,0.002568,0.002253,-0.000287,-0.002311,-0.001733,-0.001640,-0.000131,0.000145,-0.001314,0.001976,-0.000252,-0.002027,-0.001520,-0.001438,-0.000115,0.000127,-0.001153,0.000032,0.000259,0.000194,0.000183,0.000015,-0.000016,0.000147,0.002079,0.001559,0.001475,0.000118,-0.000131,0.001182,0.001169,0.001106,0.000089,-0.000098,0.000887,0.001047,0.000084,-0.000093,0.000839,0.000007,-0.000007,0.000067,0.000008,-0.000074,0.000672
3,1.0,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,0.007932,0.003976,0.001033,0.003265,-0.001086,-0.002226,0.003210,-0.003056,-0.002021,0.000834,0.001993,0.000518,0.001636,-0.000544,-0.001116,0.001609,-0.001532,-0.001013,0.000418,0.000134,0.000425,-0.000141,-0.000290,0.000418,-0.000398,-0.000263,0.000109,0.001344,-0.000447,-0.000916,0.001321,-0.001258,-0.000832,0.000343,0.000149,0.000305,-0.000439,0.000418,0.000277,-0.000114,0.000625,-0.000901,0.000857,0.000567,-0.000234,0.001299,-0.001236,-0.000818,0.000337,0.001177,0.000779,-0.000321,0.000515,-0.000212,0.000088
4,1.0,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,0.000029,-0.000240,-0.000196,0.000118,0.000021,0.000084,0.000044,-0.000014,-0.000172,-0.000251,0.001993,0.001624,-0.000976,-0.000176,-0.000696,-0.000363,0.000116,0.001428,0.002082,0.001324,-0.000796,-0.000143,-0.000567,-0.000296,0.000094,0.001164,0.001697,0.000478,0.000086,0.000341,0.000178,-0.000057,-0.000700,-0.001020,0.000015,0.000061,0.000032,-0.000010,-0.000126,-0.000184,0.000243,0.000127,-0.000040,-0.000499,-0.000727,0.000066,-0.000021,-0.000260,-0.000380,0.000007,0.000083,0.000121,0.001023,0.001492,0.002175
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
437,1.0,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,0.001740,0.002114,0.000820,0.002492,-0.000238,-0.000107,-0.001196,-0.000108,0.001301,0.000301,0.002568,0.000996,0.003028,-0.000289,-0.000130,-0.001453,-0.000131,0.001581,0.000365,0.000387,0.001175,-0.000112,-0.000050,-0.000564,-0.000051,0.000613,0.000142,0.003569,-0.000340,-0.000153,-0.001713,-0.000155,0.001864,0.000431,0.000032,0.000015,0.000163,0.000015,-0.000178,-0.000041,0.000007,0.000074,0.000007,-0.000080,-0.000018,0.000822,0.000074,-0.000894,-0.000207,0.000007,-0.000081,-0.000019,0.000973,0.000225,0.000052
438,1.0,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018118,0.044485,0.000030,-0.000279,0.000088,0.000373,-0.000272,-0.000437,0.000158,-0.000189,0.000100,-0.000245,0.002568,-0.000806,-0.003428,0.002501,0.004012,-0.001453,0.001739,-0.000918,0.002255,0.000253,0.001076,-0.000785,-0.001259,0.000456,-0.000546,0.000288,-0.000708,0.004575,-0.003338,-0.005355,0.001940,-0.002321,0.001226,-0.003009,0.002435,0.003906,-0.001415,0.001693,-0.000894,0.002195,0.006267,-0.002270,0.002716,-0.001434,0.003522,0.000822,-0.000984,0.000520,-0.001276,0.001177,-0.000622,0.001526,0.000328,-0.000806,0.001979
439,1.0,0.041708,0.050680,-0.015906,0.017282,-0.037344,-0.013840,-0.024993,-0.011080,-0.046879,0.015491,0.001740,0.002114,-0.000663,0.000721,-0.001558,-0.000577,-0.001042,-0.000462,-0.001955,0.000646,0.002568,-0.000806,0.000876,-0.001893,-0.000701,-0.001267,-0.000562,-0.002376,0.000785,0.000253,-0.000275,0.000594,0.000220,0.000398,0.000176,0.000746,-0.000246,0.000299,-0.000645,-0.000239,-0.000432,-0.000191,-0.000810,0.000268,0.001395,0.000517,0.000933,0.000414,0.001751,-0.000578,0.000192,0.000346,0.000153,0.000649,-0.000214,0.000625,0.000277,0.001172,-0.000387,0.000123,0.000519,-0.000172,0.002198,-0.000726,0.000240
440,1.0,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044528,-0.025930,0.002068,0.002030,-0.001776,-0.000055,-0.000742,-0.000695,0.001304,-0.001208,-0.002025,0.001179,0.001993,-0.001744,-0.000054,-0.000728,-0.000682,0.001280,-0.001186,-0.001988,0.001158,0.001526,0.000047,0.000637,0.000597,-0.001120,0.001037,0.001739,-0.001013,0.000001,0.000020,0.000019,-0.000035,0.000032,0.000054,-0.000032,0.000266,0.000249,-0.000468,0.000433,0.000727,-0.000423,0.000234,-0.000438,0.000406,0.000681,-0.000396,0.000822,-0.000762,-0.001277,0.000744,0.000705,0.001183,-0.000689,0.001983,-0.001155,0.000672


In [31]:
y = pd.DataFrame(diabetes_dataset.target, columns=['diabetes'])

In [32]:
y

Unnamed: 0,diabetes
0,151.0
1,75.0
2,141.0
3,206.0
4,135.0
...,...
437,178.0
438,104.0
439,132.0
440,220.0


### 데이터 분리하기

In [34]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

### 학습하기

In [36]:
model = LinearRegression()

In [37]:
model.fit(x_train, y_train)

LinearRegression()

### 예측 & 평가하기

In [38]:
y_test_predict = model.predict(x_test)

In [40]:
mse = mean_squared_error(y_test, y_test_predict)

In [41]:
print(mse ** 0.5)

57.877049027248574
