In [2]:
import pandas as pd

df = pd.DataFrame({
    '키': [150, 160, 170, 175, 165, 155, 172, 168, 174, 158,
          162, 173, 156, 159, 167, 163, 171, 169, 176, 161],
    '몸무게': [74, 50, 70, 64, 56, 48, 68, 60, 65, 52,
            54, 67, 49, 51, 58, 55, 69, 61, 66, 53]
})

In [7]:
# 모델 학습 summary 출력
from statsmodels.formula.api import ols
model = ols('키 ~ 몸무게', data=df).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                      키   R-squared:                       0.280
Model:                            OLS   Adj. R-squared:                  0.240
Method:                 Least Squares   F-statistic:                     6.984
Date:                Wed, 06 Nov 2024   Prob (F-statistic):             0.0165
Time:                        02:36:04   Log-Likelihood:                -64.701
No. Observations:                  20   AIC:                             133.4
Df Residuals:                      18   BIC:                             135.4
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    135.8209     11.211     12.115      0.0

In [9]:
# 결정계수 R-squared
# 0.280
model.rsquared

0.27954323113299584

In [10]:
# 기울기 (회귀계수)
# 0.4938
model.params['몸무게']

0.49376558603491283

In [12]:
# 절편 (회귀계수)
# 135.8209
model.params['Intercept']

135.8209476309227

In [14]:
# 몸무게의 회귀계수가 통계적으로 유의한지 pvalue
# 0.017
model.pvalues['몸무게']

0.01654013445316978

In [16]:
# 몸무게가 50일 때 예측키
newdata=pd.DataFrame({'몸무게':[50]})
model.predict(newdata)

Unnamed: 0,0
0,160.509227


In [22]:
# 잔차제곱합
# 잔차 = 관측(실제)값 - 예측값
df['잔차'] = df['키']-model.predict(df['몸무게'])
sum(df['잔차']**2)

755.9032418952607

In [26]:
# MSE
(df['잔차'] ** 2).mean()

37.79516209476303

In [28]:
# 사이킷런 MSE
from sklearn.metrics import mean_squared_error
pred = model.predict(df)
mean_squared_error(df['키'], pred)

37.79516209476303

In [29]:
# 신뢰구간
# 0.101       0.886

In [31]:
# 몸무게가 40일 때 예측키에 대한 신뢰구간과 , 예측구간
newdata = pd.DataFrame({'몸무게':[50]})
pred = model.get_prediction(newdata)
pred.summary_frame(alpha=0.05)

Unnamed: 0,mean,mean_se,mean_ci_lower,mean_ci_upper,obs_ci_lower,obs_ci_upper
0,160.509227,2.291332,155.695318,165.323136,146.068566,174.949888


In [None]:
# 신뢰구간: 155.695318	165.323136
# 예측구간: 146.068566	174.949888