# Iris 다중회귀분석

In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.datasets import load_iris
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['Class'] = iris.target
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),Class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


## petal length 구하기

In [3]:
y_pl = df['petal length (cm)']
X_pl = df.drop('petal length (cm)', axis=1)

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
pl_X_train, pl_X_test, pl_y_train, pl_y_test = train_test_split(X_pl, y_pl, test_size=0.2, random_state=11)

pl_lr = LinearRegression()
pl_lr.fit(pl_X_train, pl_y_train)
pl_pred = pl_lr.predict(pl_X_test)
pl_mse = mean_squared_error(pl_y_test, pl_pred)
pl_r2 = r2_score(pl_y_test, pl_pred)
print(f'MSE: {pl_mse:.2f}, RMSE: {np.sqrt(pl_mse):.2f}')
print(f'R_squared: {pl_r2:.4f}')

MSE: 0.12, RMSE: 0.34
R_squared: 0.9560


In [5]:
pl_weight = pl_lr.coef_
pl_weight

array([ 0.73546445, -0.58084231,  0.92282323,  0.51932569])

In [6]:
pl_bias = pl_lr.intercept_
pl_bias

-0.38563891796042693

### 회귀식

In [7]:
petal_length = []
for i in df.index:
    petal_length.append(round(pl_weight[0] * df['sepal length (cm)'][i] + pl_weight[1] * df['sepal width (cm)'][i] + pl_weight[2] * df['petal width (cm)'][i] + pl_weight[3] * df['Class'][i] + pl_bias, 2))

### 교차 검증

In [8]:
from sklearn.model_selection import cross_val_score

pl_neg_mean_scores = cross_val_score(pl_lr, X_pl, y_pl, scoring='neg_mean_squared_error', cv=5)
pl_rmse_scores = np.sqrt(-pl_neg_mean_scores)
pl_avg_rmse = np.average(pl_rmse_scores)

print('개별 MSE:', np.round(pl_neg_mean_scores, 2))
print('개별 RMSE:', np.round(pl_rmse_scores, 2))
print(f'평균 RMSE: {pl_avg_rmse:.4f}')

개별 MSE: [-0.06 -0.08 -0.14 -0.1  -0.21]
개별 RMSE: [0.25 0.28 0.38 0.32 0.46]
평균 RMSE: 0.3387


## sepal length 구하기

In [9]:
y_sl = df['sepal length (cm)']
X_sl = df.drop('sepal length (cm)', axis=1)

sl_X_train, sl_X_test, sl_y_train, sl_y_test = train_test_split(X_sl, y_sl, test_size=0.2, random_state=11)

sl_lr = LinearRegression()
sl_lr.fit(sl_X_train, sl_y_train)
sl_pred = sl_lr.predict(sl_X_test)
sl_mse = mean_squared_error(sl_y_test, sl_pred)
sl_r2 = r2_score(sl_y_test, sl_pred)
print(f'MSE: {sl_mse:.2f}, RMSE: {np.sqrt(sl_mse):.2f}')
print(f'R_squared: {sl_r2:.4f}')

MSE: 0.12, RMSE: 0.34
R_squared: 0.7717


In [10]:
sl_weight = sl_lr.coef_
sl_weight

array([ 0.65273839,  0.79008613, -0.42637535, -0.2980568 ])

In [11]:
sl_bias = sl_lr.intercept_
sl_bias

1.6912963007194692

### 회귀식

In [12]:
sepal_length = []
for i in df.index:
    sepal_length.append(round(sl_weight[0] * df['sepal width (cm)'][i] + sl_weight[1] * df['petal length (cm)'][i] + sl_weight[2] * df['petal width (cm)'][i] + sl_weight[3] * df['Class'][i] + sl_bias, 2))

### 교차 검증

In [13]:
sl_neg_mean_scores = cross_val_score(sl_lr, X_sl, y_sl, scoring='neg_mean_squared_error', cv=5)
sl_rmse_scores = np.sqrt(-sl_neg_mean_scores)
sl_avg_rmse = np.average(sl_rmse_scores)

print('개별 MSE:', np.round(sl_neg_mean_scores, 2))
print('개별 RMSE:', np.round(sl_rmse_scores, 2))
print(f'평균 RMSE: {sl_avg_rmse:.4f}')

개별 MSE: [-0.06 -0.08 -0.14 -0.1  -0.14]
개별 RMSE: [0.25 0.29 0.37 0.31 0.38]
평균 RMSE: 0.3199


### sepal width 구하기

In [14]:
y_sw = df['sepal width (cm)']
X_sw = df.drop('sepal width (cm)', axis=1)

sw_X_train, sw_X_test, sw_y_train, sw_y_test = train_test_split(X_sw, y_sw, test_size=0.2, random_state=11)

sw_lr = LinearRegression()
sw_lr.fit(sw_X_train, sw_y_train)
sw_pred = sw_lr.predict(sw_X_test)
sw_mse = mean_squared_error(sw_y_test, sw_pred)
sw_r2 = r2_score(sw_y_test, sw_pred)
print(f'MSE: {sw_mse:.2f}, RMSE: {np.sqrt(sw_mse):.2f}')
print(f'R_squared: {sw_r2:.4f}')

MSE: 0.11, RMSE: 0.33
R_squared: 0.4645


In [15]:
sw_weight = sw_lr.coef_
sw_weight

array([ 0.62354856, -0.59607666,  0.51886865,  0.04129831])

In [16]:
sw_bias = sw_lr.intercept_
sw_bias

0.9918166142805211

### 회귀식

In [17]:
sepal_width = []
for i in df.index:
    sepal_width.append(round(sw_weight[0] * df['sepal length (cm)'][i] + sw_weight[1] * df['petal length (cm)'][i] + sw_weight[2] * df['petal width (cm)'][i] + sw_weight[3] * df['Class'][i] + sw_bias, 2))

### 교차 검증

In [18]:
sw_neg_mean_scores = cross_val_score(sw_lr, X_sw, y_sw, scoring='neg_mean_squared_error', cv=5)
sw_rmse_scores = np.sqrt(-sw_neg_mean_scores)
sw_avg_rmse = np.average(sw_rmse_scores)

print('개별 MSE:', np.round(sw_neg_mean_scores, 2))
print('개별 RMSE:', np.round(sw_rmse_scores, 2))
print(f'평균 RMSE: {sw_avg_rmse:.4f}')

개별 MSE: [-0.08 -0.11 -0.16 -0.08 -0.11]
개별 RMSE: [0.28 0.33 0.41 0.29 0.33]
평균 RMSE: 0.3289


### petal width 구하기

In [19]:
y_pw = df['petal width (cm)']
X_pw = df.drop('petal width (cm)', axis=1)

pw_X_train, pw_X_test, pw_y_train, pw_y_test = train_test_split(X_pw, y_pw, test_size=0.2, random_state=11)

pw_lr = LinearRegression()
pw_lr.fit(pw_X_train, pw_y_train)
pw_pred = pw_lr.predict(pw_X_test)
pw_mse = mean_squared_error(pw_y_test, pw_pred)
pw_r2 = r2_score(pw_y_test, pw_pred)
print(f'MSE: {pw_mse:.2f}, RMSE: {np.sqrt(pw_mse):.2f}')
print(f'R_squared: {pw_r2:.4f}')

MSE: 0.03, RMSE: 0.16
R_squared: 0.9480


In [20]:
pw_weight = pw_lr.coef_
pw_weight

array([-0.13637801,  0.1737315 ,  0.31709073,  0.39551047])

In [21]:
pw_bias = pw_lr.intercept_
pw_bias

-0.12086232598757674

### 회귀식

In [22]:
petal_width = []
for i in df.index:
    petal_width.append(round(pw_weight[0] * df['sepal length (cm)'][i] + pw_weight[1] * df['sepal width (cm)'][i] + pw_weight[2] * df['petal length (cm)'][i] + pw_weight[3] * df['Class'][i] + pw_bias, 2))

### 교차 검증

In [23]:
pw_neg_mean_scores = cross_val_score(pw_lr, X_pw, y_pw, scoring='neg_mean_squared_error', cv=5)
pw_rmse_scores = np.sqrt(-pw_neg_mean_scores)
pw_avg_rmse = np.average(pw_rmse_scores)

print('개별 MSE:', np.round(pw_neg_mean_scores, 2))
print('개별 RMSE:', np.round(pw_rmse_scores, 2))
print(f'평균 RMSE: {pw_avg_rmse:.4f}')

개별 MSE: [-0.01 -0.02 -0.02 -0.04 -0.07]
개별 RMSE: [0.09 0.14 0.15 0.19 0.26]
평균 RMSE: 0.1667


In [24]:
df['sepal length (regression)'] = sepal_length
df['sepal width (regression)'] = sepal_width
df['petal length (regression)'] = petal_length
df['petal width (regression)'] = petal_width
df.head(10)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),Class,sepal length (regression),sepal width (regression),petal length (regression),petal width (regression)
0,5.1,3.5,1.4,0.2,0,5.0,3.44,1.52,0.24
1,4.9,3.0,1.4,0.2,0,4.67,3.32,1.66,0.18
2,4.7,3.2,1.3,0.2,0,4.72,3.25,1.4,0.21
3,4.6,3.1,1.5,0.2,0,4.81,3.07,1.38,0.27
4,5.0,3.6,1.4,0.2,0,5.06,3.38,1.39,0.27
5,5.4,3.9,1.7,0.4,0,5.41,3.55,1.69,0.36
6,4.6,3.4,1.4,0.3,0,4.89,3.18,1.3,0.29
7,5.0,3.4,1.5,0.2,0,5.01,3.32,1.5,0.26
8,4.4,2.9,1.4,0.2,0,4.61,3.0,1.35,0.23
9,4.9,3.1,1.5,0.1,0,4.86,3.2,1.51,0.23
