In [1]:
import numpy as np
import pandas as pd

from sklearn import linear_model

import statsmodels.api as sm

In [2]:
import os
os.getcwd()
os.chdir("/home/aumaron/Desktop/other_projects/")

In [3]:
# Data
ring_failure = pd.read_excel("datasets/o_ring_failure.xlsx", engine="openpyxl")

print(ring_failure.shape)
target = np.array(ring_failure.loc[:, "number_experiencing_thermal_distress"])
ring_failure.drop(columns=["number_experiencing_thermal_distress"], inplace=True)
train_array = np.array(ring_failure)


(23, 4)


### Model with Intercept (without penalty)

In [4]:
#Linear Regression sklearn

lm_sk = linear_model.LinearRegression()
lm_sk.fit(train_array, target)
print(lm_sk.coef_)
print(lm_sk.intercept_)

[-0.05138594  0.00175701  0.01429284]
3.527093383307082


In [5]:
# Linear regression statsmodel

train_array_new = sm.add_constant(train_array, has_constant="add")
lm_st = sm.OLS(target, train_array_new).fit()
print(lm_st.params)
lm_st.summary()

[ 3.52709338e+00 -5.13859399e-02  1.75700897e-03  1.42928426e-02]


0,1,2,3
Dep. Variable:,y,R-squared:,0.36
Model:,OLS,Adj. R-squared:,0.259
Method:,Least Squares,F-statistic:,3.563
Date:,"Fri, 30 Apr 2021",Prob (F-statistic):,0.0337
Time:,15:45:50,Log-Likelihood:,-17.308
No. Observations:,23,AIC:,42.62
Df Residuals:,19,BIC:,47.16
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,3.5271,1.307,2.699,0.014,0.791,6.263
x1,-0.0514,0.018,-2.802,0.011,-0.090,-0.013
x2,0.0018,0.003,0.517,0.611,-0.005,0.009
x3,0.0143,0.035,0.407,0.689,-0.059,0.088

0,1,2,3
Omnibus:,17.3,Durbin-Watson:,2.392
Prob(Omnibus):,0.0,Jarque-Bera (JB):,18.847
Skew:,1.686,Prob(JB):,8.08e-05
Kurtosis:,5.881,Cond. No.,1980.0


### Model with intercept (lasso penalty)

In [6]:
#Linear Regression sklearn
lm_lasso_sk = linear_model.Lasso(alpha=0.1, fit_intercept=True)
lm_lasso_sk.fit(train_array, target)
print(lm_lasso_sk.coef_)
print(lm_lasso_sk.intercept_)

[-0.04740112  0.00254189  0.00443211]
3.2487785767510435


In [7]:
# Linear regression statsmodel
train_array_new = sm.add_constant(train_array, has_constant="add")
lm_st = sm.OLS(target, train_array_new)
result = lm_st.fit()

# Lasso
results_fr = lm_st.fit_regularized(method="elastic_net", L1_wt=1, alpha=0.1, start_params=None)
final = sm.regression.linear_model.OLSResults(lm_st,
                                              results_fr.params, 
                                              lm_st.normalized_cov_params)
print(final.params)
final.summary()

[ 0.         -0.00295913  0.00470805 -0.01229357]


0,1,2,3
Dep. Variable:,y,R-squared:,0.112
Model:,OLS,Adj. R-squared:,-0.028
Method:,Least Squares,F-statistic:,0.8011
Date:,"Fri, 30 Apr 2021",Prob (F-statistic):,0.509
Time:,15:45:51,Log-Likelihood:,-21.071
No. Observations:,23,AIC:,50.14
Df Residuals:,19,BIC:,54.68
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0,1.539,0,1.000,-3.222,3.222
x1,-0.0030,0.022,-0.137,0.892,-0.048,0.042
x2,0.0047,0.004,1.175,0.254,-0.004,0.013
x3,-0.0123,0.041,-0.297,0.770,-0.099,0.074

0,1,2,3
Omnibus:,6.553,Durbin-Watson:,2.353
Prob(Omnibus):,0.038,Jarque-Bera (JB):,4.848
Skew:,1.107,Prob(JB):,0.0886
Kurtosis:,3.395,Cond. No.,1980.0
