In [20]:
import pandas as pd
import numpy as np

df=pd.read_csv("Hamburger.csv")

In [21]:
from sklearn.utils.discovery import all_estimators
# 다음 4개에 대해서 진행! 
estimators=all_estimators(type_filter='regressor')
for i in estimators:
    if "Lasso" in i[0] :
        print(i)

('Lasso', <class 'sklearn.linear_model._coordinate_descent.Lasso'>)
('LassoCV', <class 'sklearn.linear_model._coordinate_descent.LassoCV'>)
('LassoLars', <class 'sklearn.linear_model._least_angle.LassoLars'>)
('LassoLarsCV', <class 'sklearn.linear_model._least_angle.LassoLarsCV'>)
('LassoLarsIC', <class 'sklearn.linear_model._least_angle.LassoLarsIC'>)
('MultiTaskLasso', <class 'sklearn.linear_model._coordinate_descent.MultiTaskLasso'>)
('MultiTaskLassoCV', <class 'sklearn.linear_model._coordinate_descent.MultiTaskLassoCV'>)


In [22]:
# 위의 Lasso들을 모두 import
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV
from sklearn.linear_model import LassoLars
from sklearn.linear_model import LassoLarsCV
from sklearn.linear_model import LassoLarsIC
from sklearn.linear_model import MultiTaskLasso
from sklearn.linear_model import MultiTaskLassoCV

from sklearn.model_selection import GridSearchCV # 최적의 파라미터를 찾기 위한 그리드서치 및 기타 기능 import
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [23]:
def save_model(model,filename):
    import joblib
    import os
    model_dir='./model/'
    model_filename=model_dir+f'{filename}.pkl'
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    joblib.dump(model.best_estimator_, model_filename)

In [24]:
feature=df[["sodium","sugar","total_fat","protein"]]
target=df["calories"]

In [25]:
# Lasso : https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html#sklearn.linear_model.Lasso
test_max=0
train_max=0
for i in range(1000):
    x_train,x_test,y_train,y_test=train_test_split(feature,target,test_size=0.3,random_state=i)
    model=Lasso()
    model.fit(x_train,y_train)
    res1 = model.score(x_train,y_train)
    res2 = model.score(x_test,y_test)
    if np.abs(res1-res2) < 0.01 and res1 > train_max:
        train_max=res1
        test_max=res2
        print(i,train_max, test_max)

2 0.8645625728056143 0.8590083537852139
28 0.8662447125704024 0.8568647850792414


KeyboardInterrupt: 

In [None]:
x_train, x_test, y_train, y_test = train_test_split(feature, target, test_size=0.3, random_state=872)
param_grid = {
    "alpha": [0.1, 0.5, 1.0, 1.5, 2.0],
    "max_iter": [1000, 5000, 10000],
    "tol": [0.0001, 0.001, 0.01],
    "selection": ["cyclic", "random"],
    "positive": [True, False]
}
model = GridSearchCV(Lasso(), param_grid=param_grid, cv=5)
model.fit(x_train, y_train)
# 최적의 하이퍼 파라미터 출력
print("Best hyperparameters:", model.best_params_)
# 테스트 데이터로 모델 평가
score = model.score(x_test, y_test)
print("Model score on test data:", score)

Best hyperparameters: {'alpha': 1.5, 'max_iter': 10000, 'positive': True, 'selection': 'random', 'tol': 0.01}
Model score on test data: 0.8535480908562916


In [None]:
save_model(model, "Lasso")

In [None]:
# LassoCV : https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoCV.html#sklearn.linear_model.LassoCV
test_max=0
train_max=0
for i in range(1000):
    x_train,x_test,y_train,y_test=train_test_split(feature,target,test_size=0.3,random_state=i)
    model=LassoCV()
    model.fit(x_train,y_train)
    res1 = model.score(x_train,y_train)
    res2 = model.score(x_test,y_test)
    if np.abs(res1-res2) < 0.01 and res1 > train_max:
        train_max=res1
        test_max=res2
        print(i,train_max, test_max)

2 0.8607554777191857 0.8516495274996406
5 0.8619842448865522 0.8525993809568189
32 0.8626519057761934 0.8556169521026503
110 0.8642731785407494 0.8594226072918228
797 0.8643966009580126 0.8546396927085064
974 0.8649196790973666 0.8559656903908645


In [None]:
x_train, x_test, y_train, y_test = train_test_split(feature, target, test_size=0.3, random_state=974)
param_grid = {
    'eps': [1e-2, 1e-3, 1e-4],  # eps 값의 후보
    'alphas': [None, [0.1, 0.01, 0.001]],  # 알파 값의 리스트
    'fit_intercept': [True, False],  # 절편을 계산할지 여부
    'precompute': ['auto', True, False],  # 미리 계산된 Gram 행렬을 사용할지 여부
    'tol': [1e-3, 1e-4, 1e-5],  # 수렴 기준
    'positive': [True, False],  # 회귀 계수를 양수로 제한할지 여부
    'selection': ['cyclic', 'random']  # feature 업데이트 방식
}
model = GridSearchCV(LassoCV(), param_grid=param_grid, cv=5)
model.fit(x_train, y_train)
# 최적의 하이퍼 파라미터 출력
print("Best hyperparameters:", model.best_params_)
# 테스트 데이터로 모델 평가
score = model.score(x_test, y_test)
print("Model score on test data:", score)

Best hyperparameters: {'alphas': [0.1, 0.01, 0.001], 'eps': 0.0001, 'fit_intercept': False, 'positive': True, 'precompute': 'auto', 'selection': 'random', 'tol': 0.001}
Model score on test data: 0.8535573177675566


In [None]:
save_model(model, "LassoCV")

In [26]:
# LassoLars : https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoLars.html#sklearn.linear_model.LassoLars
test_max=0
train_max=0
for i in range(1000):
    x_train,x_test,y_train,y_test=train_test_split(feature,target,test_size=0.3,random_state=i)
    model=LassoLars()
    model.fit(x_train,y_train)
    res1 = model.score(x_train,y_train)
    res2 = model.score(x_test,y_test)
    if res2 > test_max:
        train_max=res1
        test_max=res2
        print(i,train_max, test_max)

0 0.8561660250176204 0.882506728306845
3 0.8499150000486391 0.8899332401829739
12 0.8464870167676537 0.8980977872644373
31 0.8293136934894153 0.8985628901278372
39 0.8412577086401332 0.899674286266148
93 0.840141484308524 0.9023012709802588
358 0.8465029972645741 0.9065296863740122


In [30]:
x_train, x_test, y_train, y_test = train_test_split(feature, target, test_size=0.3, random_state=358)
param_grid = {
    'eps': [1e-2, 1e-3, 1e-4],  # eps 값의 후보
    'fit_intercept': [True, False],  # 절편을 계산할지 여부
    'precompute': ['auto', True, False],  # 미리 계산된 Gram 행렬을 사용할지 여부
    'positive': [True, False],  # 회귀 계수를 양수로 제한할지 여부
}
model = GridSearchCV(LassoLars(), param_grid=param_grid, cv=5)
model.fit(x_train, y_train)
# 최적의 하이퍼 파라미터 출력
print("Best hyperparameters:", model.best_params_)
# 테스트 데이터로 모델 평가
score = model.score(x_test, y_test)
print("Model score on test data:", score)

Best hyperparameters: {'eps': 0.01, 'fit_intercept': False, 'positive': True, 'precompute': False}
Model score on test data: 0.9056713898137765


In [31]:
save_model(model, "LassoLars")

In [None]:
# =============================================================================
# 일시 중지
# =============================================================================

In [None]:
# LassoLarsCV : https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoLarsCV.html#sklearn.linear_model.LassoLarsCV
test_max=0
train_max=0
for i in range(1000):
    x_train,x_test,y_train,y_test=train_test_split(feature,target,test_size=0.3,random_state=i)
    model=LassoLarsCV()
    model.fit(x_train,y_train)
    res1 = model.score(x_train,y_train)
    res2 = model.score(x_test,y_test)
    if np.abs(res1-res2) < 0.01 and res1 > train_max:
        train_max=res1
        test_max=res2
        print(train_max, test_max)

0.8645626461392588 0.8590189139309868
0.8662447925771448 0.8568797559066781


In [None]:
# LassoLarsIC : https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoLarsIC.html#sklearn.linear_model.LassoLarsIC
test_max=0
train_max=0
for i in range(1000):
    x_train,x_test,y_train,y_test=train_test_split(feature,target,test_size=0.3,random_state=i)
    model=LassoLarsIC()
    model.fit(x_train,y_train)
    res1 = model.score(x_train,y_train)
    res2 = model.score(x_test,y_test)
    if np.abs(res1-res2) < 0.01 and res1 > train_max:
        train_max=res1
        test_max=res2
        print(train_max, test_max)

0.8645626461392588 0.8590189139309868
0.8662447925771448 0.8568797559066781


In [None]:
# ElasticNet : https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html#sklearn.linear_model.ElasticNet
from sklearn.linear_model import ElasticNet
test_max=0
train_max=0
for i in range(1000):
    x_train,x_test,y_train,y_test=train_test_split(feature,target,test_size=0.3,random_state=i)
    model=ElasticNet(alpha=0.1)
    model.fit(x_train,y_train)
    res1 = model.score(x_train,y_train)
    res2 = model.score(x_test,y_test)
    if np.abs(res1-res2) < 0.01 and res1 > train_max:
        train_max=res1
        test_max=res2
        print(train_max, test_max)

0.8645626391446695 0.8590161721896995
0.8662447847685604 0.8568746260566804
