# 다중회귀

- 특성 공학
    - 기존의 특성을 활용하여 새로운 특성을 뽑아내는 작업

In [32]:
import pandas as pd
from sklearn.model_selection import train_test_split


In [33]:
file_path = '/Users/chojungseok/Desktop/code/DAMF2/ML/data/Fish.csv'
df = pd.read_csv(file_path)

df.head()

Unnamed: 0,Species,Weight,Length1,Length2,Length3,Height,Width
0,Bream,242.0,23.2,25.4,30.0,11.52,4.02
1,Bream,290.0,24.0,26.3,31.2,12.48,4.3056
2,Bream,340.0,23.9,26.5,31.1,12.3778,4.6961
3,Bream,363.0,26.3,29.0,33.5,12.73,4.4555
4,Bream,430.0,26.5,29.0,34.0,12.444,5.134


In [34]:
perech_df = df.loc[df['Species'] == 'Perch']

In [35]:
perech_df.head()

Unnamed: 0,Species,Weight,Length1,Length2,Length3,Height,Width
72,Perch,5.9,7.5,8.4,8.8,2.112,1.408
73,Perch,32.0,12.5,13.7,14.7,3.528,1.9992
74,Perch,40.0,13.8,15.0,16.0,3.824,2.432
75,Perch,51.5,15.0,16.2,17.2,4.5924,2.6316
76,Perch,70.0,15.7,17.4,18.5,4.588,2.9415


In [36]:
perch_full = perech_df[['Length2', 'Height', 'Width']]
perch_weight = perech_df[['Weight']]

In [37]:
print(perch_full.shape, perch_weight.shape)

(56, 3) (56, 1)


In [38]:
train_input, test_input, train_target, test_target = train_test_split(perch_full, perch_weight)

### transformer(변환기)

In [39]:
# 특성 공학을 도아준다
# 직접 공학을 하지 않아도 poly에 내가 공학을 하고자 하는 feature를 넣으면 자동으로 만들어 준다
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(include_bias=False) # 1을 포함 하지 않는다
# 포함 할경우 상수항에 1을 곱한다고 인식하여 두 array를 곱한다고 인식

poly.fit([[3,5]]) # fit은 특정 기능을 하는것은 없지만 transform을 하기 위해서는 fit을 해야지만 transform을 할 수 있다. (학습을 하는게 아님)
poly.transform([[3,5]])

array([[ 3.,  5.,  9., 15., 25.]])

In [40]:
poly = PolynomialFeatures(include_bias=False) 

poly.fit(train_input)
train_poly = poly.transform(train_input)

train_poly[:5]
train_poly.shape

(42, 9)

In [41]:
poly.get_feature_names_out()

array(['Length2', 'Height', 'Width', 'Length2^2', 'Length2 Height',
       'Length2 Width', 'Height^2', 'Height Width', 'Width^2'],
      dtype=object)

In [42]:
from sklearn.linear_model import LinearRegression

In [43]:
print(train_poly.shape, train_target.shape)

(42, 9) (42, 1)


In [44]:
# 인스턴스화
lr = LinearRegression()

lr.fit(train_poly, train_target)
lr.score(train_poly, train_target)

0.9940079259195529

In [45]:
test_poly = poly.transform(test_input)

In [46]:
lr.score(test_poly,test_target)

0.9496512236871688

In [47]:
poly = PolynomialFeatures(degree = 5, include_bias = False)
poly.fit(train_input)
train_poly = poly.transform(train_input)
test_poly = poly.transform(test_input)
train_poly.shape

(42, 55)

In [48]:
lr.fit(train_poly, train_target)

In [49]:
print(lr.score(train_poly, train_target))
print(lr.score(test_poly, test_target))

0.99999999999382
-673.7346918549644


### 스캐일링

In [50]:
from sklearn.preprocessing import StandardScaler

ss = StandardScaler()
ss.fit(train_poly)

In [51]:
train_scaled = ss.transform(train_poly)
test_scaled = ss.transform(test_poly)

In [52]:
# train_scaled[:5]

릿지(L2) / 라쏘(L1)  
1. 릿지: 계수를 ㄱ제곱한 값을 기준으로 규제 적용
    - alpha(L2 norm): 규제를 얼마나 강하게 줄 것인가
3. 라쏘: 계수의 절댓값을 기준으로 규제적용  
일반적으로 릿지 선호 / 계수의 크기를 줄이는게 목적  
**라쏘는 아예 0으로 만들어 버릴수 있다**

In [56]:
# 릿지
from sklearn.linear_model import Ridge

ridge = Ridge(alpha = 0.1)
ridge.fit(train_scaled, train_target)

print(ridge.score(train_scaled, train_target))
print(ridge.score(test_scaled, test_target))

0.9947719841501327
0.9512923658157604


In [57]:
# 라쏘
from sklearn.linear_model import Lasso
lasso = Lasso()
lasso.fit(train_scaled, train_target)

print(lasso.score(train_scaled, train_target))
print(lasso.score(test_scaled, test_target))

0.9913694196990276
0.9674489051582504


In [58]:
lasso.coef_

array([  0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,  33.81156083,  76.8139028 ,
        50.73182312, 127.55800301,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,  30.25540431,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,  -0.        ,  -0.        ,  -0.        ,
        -0.        ,  -0.        ,  -0.        ,  -0.        ,
        -0.        ,  -0.        ,  -0.        ,  -0.        ,
        -0.        ,  -0.        ,  -0.        ])