In [1]:
import pandas as pd
import numpy as np

df=pd.read_csv("Hamburger.csv")

In [2]:
df.isna().sum()

restaurant    0
item          0
calories      0
sodium        0
sugar         0
total_fat     0
protein       0
dtype: int64

In [3]:
df[["calories","sugar","protein","total_fat","sodium"]].corr()

Unnamed: 0,calories,sugar,protein,total_fat,sodium
calories,1.0,0.305176,0.172225,0.722992,0.811997
sugar,0.305176,1.0,0.065812,-0.055417,-0.096583
protein,0.172225,0.065812,1.0,0.093125,0.005111
total_fat,0.722992,-0.055417,0.093125,1.0,0.706748
sodium,0.811997,-0.096583,0.005111,0.706748,1.0


In [4]:
from sklearn.utils.discovery import all_estimators
# 다음 4개에 대해서 진행! 
estimators=all_estimators(type_filter='regressor')
for i in estimators:
    if "Ridge" in i[0] :
        print(i)

('BayesianRidge', <class 'sklearn.linear_model._bayes.BayesianRidge'>)
('KernelRidge', <class 'sklearn.kernel_ridge.KernelRidge'>)
('Ridge', <class 'sklearn.linear_model._ridge.Ridge'>)
('RidgeCV', <class 'sklearn.linear_model._ridge.RidgeCV'>)


In [5]:
from sklearn.linear_model import Ridge          # Ridge 회귀분석
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import BayesianRidge

from sklearn.model_selection import GridSearchCV # 최적의 파라미터를 찾기 위한 그리드서치 및 기타 기능 import
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [6]:
feature=df[["sodium","sugar","total_fat","protein"]]
target=df["calories"]

In [10]:
def save_model(model,filename):
    import joblib
    import os
    model_dir='./model/'
    model_filename=model_dir+f'{filename}.pkl'
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    joblib.dump(model, model_filename)

In [12]:
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
# 승민님 추가작업 - 모델 호환성을 위해 모델 다시 작성 
x_train,x_test,y_train,y_test=train_test_split(feature,target,test_size=0.15,random_state=287)
model=LinearRegression(fit_intercept=True, copy_X=False, n_jobs=100)
model.fit(x_train, y_train)
save_model(model,"LinearRegression")

model=KNeighborsRegressor(n_neighbors=2, weights='distance', p=1)
model.fit(x_train, y_train)
save_model(model,"KNeighborsRegressor")

In [7]:
# Ridge : https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html#sklearn.linear_model.Ridge
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()

test_max=0
train_max=0
for i in range(1000):
    x_train,x_test,y_train,y_test=train_test_split(feature,target,test_size=0.3,random_state=i)
    scaler.fit(x_train, y_train)
    x_train=scaler.transform(x_train)
    x_test
    model=Ridge(random_state=i)
    model.fit(x_train,y_train)
    res1 = model.score(x_train,y_train)
    res2 = model.score(x_test,y_test)
    if np.abs(res1-res2) < 0.01 and res1 > train_max:
        train_max=res1
        test_max=res2
        print(i,train_max, test_max)

0 0.8617750336409337 0.8633297212637631
15 0.8635312176832376 0.8620643930695525
18 0.8638960013051091 0.8606231007334412
28 0.8649605985952739 0.8578362995186541
56 0.8656971536776141 0.8567534424389159
76 0.8660631842710033 0.8568916399065887


In [8]:
x_train,x_test,y_train,y_test=train_test_split(feature,target,test_size=0.3,random_state=28)
model=GridSearchCV(Ridge(random_state=28, max_iter=1000),param_grid={"alpha":np.arange(0.1,1,0.1),
                                                      "solver": [ 'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'],
                                                      "fit_intercept":[True,False]},cv=5).fit(x_train, y_train)



In [9]:
res=pd.DataFrame(model.cv_results_)
res[res["rank_test_score"]==1][["params","mean_test_score"]]


save_model(model,"Ridge")

In [10]:
# KernelRidge : https://scikit-learn.org/stable/modules/generated/sklearn.kernel_ridge.KernelRidge.html#sklearn.kernel_ridge.KernelRidge
test_max=0
train_max=0
for i in range(1000):
    x_train,x_test,y_train,y_test=train_test_split(feature,target,test_size=0.3,random_state=i)
    model=KernelRidge()
    model.fit(x_train,y_train)
    res1 = model.score(x_train,y_train)
    res2 = model.score(x_test,y_test)
    if np.abs(res1-res2) < 0.01 and res1 > train_max:
        train_max=res1
        test_max=res2
        print(i,train_max, test_max)

0 0.8610393624299972 0.8623246499459608
15 0.8627157389833862 0.8612535789399854
18 0.8628497781353464 0.8606495085285095
25 0.8630520667831582 0.8608343781365919
28 0.8646726856734558 0.8562084058952573
70 0.8647614237991741 0.8558297528612774
76 0.8653271828661557 0.8559812477832894


In [11]:
x_train, x_test, y_train, y_test = train_test_split(feature, target, test_size=0.3, random_state=872)
param_grid = {
    'alpha': np.arange(0.1, 1, 0.1),
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid', 'cosine'],  # 선택한 커널에 따라 추가 파라미터도 설정 가능
    'gamma': [0.1, 0.01, 0.001],  # rbf, poly, sigmoid 커널에 사용
    'degree': [2, 3, 4],  # poly 커널에 사용
}
model = GridSearchCV(KernelRidge(), param_grid=param_grid, cv=5)
model.fit(x_train, y_train)
# 최적의 하이퍼 파라미터 출력
print("Best hyperparameters:", model.best_params_)
# 테스트 데이터로 모델 평가
score = model.score(x_test, y_test)
print("Model score on test data:", score)


  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos",

Best hyperparameters: {'alpha': 0.9, 'degree': 3, 'gamma': 0.001, 'kernel': 'poly'}
Model score on test data: 0.8993212165935588




In [12]:
save_model(model,"KernelRidge")

In [13]:
# RidgeCV : https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RidgeCV.html#sklearn.linear_model.RidgeCV
test_max=0
train_max=0
for i in range(1000):
    x_train,x_test,y_train,y_test=train_test_split(feature,target,test_size=0.3,random_state=i)
    model=RidgeCV()
    model.fit(x_train,y_train)
    res1 = model.score(x_train,y_train)
    res2 = model.score(x_test,y_test)
    if np.abs(res1-res2) < 0.01 and res1 > train_max:
        train_max=res1
        test_max=res2
        print(i,train_max, test_max)

0 0.8617750332088201 0.86333177499449
15 0.8635312173418297 0.8620639180391412
18 0.8638960010008736 0.8606211484753825
28 0.8649605982508893 0.8578357774572329
56 0.8656971532108821 0.8567536864053705
76 0.8660631838991328 0.8568921245583018


In [14]:
x_train, x_test, y_train, y_test = train_test_split(feature, target, test_size=0.3, random_state=28)
param_grid = {
    'alphas': [0.1,0.01,0.001,0.0001,0.00001], 
    'fit_intercept': [True,False], 
    'gcv_mode': ['auto','svd','eigen'], 
}
model = GridSearchCV(RidgeCV(), param_grid=param_grid, cv=5)
model.fit(x_train, y_train)
# 최적의 하이퍼 파라미터 출력
print("Best hyperparameters:", model.best_params_)
# 테스트 데이터로 모델 평가
score = model.score(x_test, y_test)
print("Model score on test data:", score)
save_model(model,"RidgeCV")

Best hyperparameters: {'alphas': 0.1, 'fit_intercept': False, 'gcv_mode': 'auto'}
Model score on test data: 0.8562084468396831


In [15]:
# BayesianRidge : https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.BayesianRidge.html#sklearn.linear_model.BayesianRidge
test_max=0
train_max=0
for i in range(1000):
    x_train,x_test,y_train,y_test=train_test_split(feature,target,test_size=0.3,random_state=i)
    model=BayesianRidge()
    model.fit(x_train,y_train)
    res1 = model.score(x_train,y_train)
    res2 = model.score(x_test,y_test)
    if np.abs(res1-res2) < 0.01 and res1 > train_max:
        train_max=res1
        test_max=res2
        print(i,train_max, test_max)

0 0.8617705464889469 0.8635559389398763
15 0.8635267109954069 0.8619998774889484
18 0.8638912436800853 0.8603484015129443
28 0.8649562528444944 0.8577675823581767
56 0.865692698590971 0.8567747334266163
76 0.8660591722310587 0.8569438954140525


In [16]:
x_train, x_test, y_train, y_test = train_test_split(feature, target, test_size=0.3, random_state=28)
param_grid = {
    'tol': [1e-4, 1e-5, 1e-6],  # 수렴 기준
    'alpha_1': [1e-7, 1e-6, 1e-5],  # alpha_1 값의 후보
    'alpha_2': [1e-7, 1e-6, 1e-5],  # alpha_2 값의 후보
    'lambda_1': [1e-7, 1e-6, 1e-5],  # lambda_1 값의 후보
    'lambda_2': [1e-7, 1e-6, 1e-5],  # lambda_2 값의 후보
    'alpha_init': [None, 1e-8, 1e-7],  # alpha_init 값의 후보
    'lambda_init': [None, 1e-8, 1e-7],  # lambda_init 값의 후보
    'compute_score': [True, False],  # 로그 마진 우도를 계산할지 여부
    'fit_intercept': [True, False],  # 절편을 계산할지 여부
}

model = GridSearchCV(BayesianRidge(max_iter=10000, verbose=True), param_grid=param_grid, cv=4)
model.fit(x_train, y_train)
# 최적의 하이퍼 파라미터 출력
print("Best hyperparameters:", model.best_params_)
# 테스트 데이터로 모델 평가
score = model.score(x_test, y_test)
print("Model score on test data:", score)

Convergence after  4  iterations
Convergence after  4  iterations
Convergence after  4  iterations
Convergence after  4  iterations
Convergence after  5  iterations
Convergence after  5  iterations
Convergence after  5  iterations
Convergence after  5  iterations
Convergence after  5  iterations
Convergence after  5  iterations
Convergence after  5  iterations
Convergence after  5  iterations
Convergence after  3  iterations
Convergence after  3  iterations
Convergence after  3  iterations
Convergence after  3  iterations
Convergence after  4  iterations
Convergence after  4  iterations
Convergence after  4  iterations
Convergence after  4  iterations
Convergence after  4  iterations
Convergence after  4  iterations
Convergence after  4  iterations
Convergence after  4  iterations
Convergence after  3  iterations
Convergence after  3  iterations
Convergence after  3  iterations
Convergence after  3  iterations
Convergence after  4  iterations
Convergence after  4  iterations
Convergenc

In [17]:
save_model(model,"BayesianRidge")