In [1]:
from sklearn.datasets import load_boston
import matplotlib.pyplot as plt
import numpy as np

In [None]:
house_price = load_boston()
house_price.data

In [None]:
y_target = house_price.target
print(y_target.shape)
plt.hist(y_target, bins=10)

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, PolynomialFeatures

def get_scaled_data(method = 'None', p_degree=None, input_data = None):
    if method == 'Standard':
        scaled_data = StandardScaler().fit_transform(input_data)
    elif method == 'MinMax':
        scaled_data = MinMaxScaler().fit_transform(input_data)
    elif method == 'Log':
        scaled_data = np.log1p(input_data)
    else :
        scaled_data = input_data
        
    if p_degree != None:
        scaled_data = PolynomialFeatures(degree=p_degree,
                                         include_bias=False).fit_transform(scaled_data)
        
    return scaled_data

In [None]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score
import pandas as pd



alphas = [0.1, 1, 10, 100]

scale_methods=[(None, None), ('Standard', None), ('Standard', 2),
               ('MinMax', None), ('MinMax', 2 ), ('Log', None)]

for scale_method in scale_methods:
    X_data_scaled = get_scaled_data(method=scale_method[0], p_degree=scale_method[1], input_data=house_price.data)
    # print('\n## 변환 유형 : {0}, Polynomial Degree: {1}'.format(scale_method[0], scale_method[1]))
    print(f'\n## 변환 유형 : {scale_method[0]}, Polynomial Degree: {scale_method[1]}' )
    
    # def get_linear_reg_eval(esimator, params = None, X_data, y_target, verbose=False):
    #     get_scaled_data(method= scale_methods[0], p_degree=)
    
def get_linear_reg_eval(method="Ridge", params=[], X_data_n=None, y_target_n=None, verbose=False) :
    result_df = pd.DataFrame()
    for param in params :
        ridge = Ridge(alpha=param)
        neg_mse_scores = cross_val_score(ridge, X_data_n, y_target_n, scoring="neg_mean_squared_error", cv=5, verbose=verbose)
        rmse_scores = np.sqrt(-1 * neg_mse_scores)
        avg_rmse = np.mean(rmse_scores)
        print(f"alpha {param}일 때 5 folds의 개별 평균 RMSE : {avg_rmse:.4f}")
               

In [None]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score
import pandas as pd

def get_linear_reg_eval(method="Ridge", params=[], X_data_n=None, y_target_n=None, verbose=True) :
    result_df = pd.DataFrame()
    for param in params :
        ridge = Ridge(alpha=param)
        neg_mse_scores = cross_val_score(ridge, X_data_n, y_target_n, scoring="neg_mean_squared_error", cv=5, verbose=verbose)
        rmse_scores = np.sqrt(-1 * neg_mse_scores)
        avg_rmse = np.mean(rmse_scores)
        print(f"alpha {param}일 때 5 folds의 개별 평균 RMSE : {avg_rmse:.4f}")

In [None]:
# Ridge의 alpha값을 다르게 적용하고 다양한 데이터 변환방법에 따른 RMSE 추출.
alphas = [0.1, 1, 10, 100]

# 변환 방법은 모두 6개, 원본 그대로, 표준정규분포, 표준정규분포+다항식 특성
# 최대/최소 정규화, 최대/최소 정규화+다항식 특성, 로그변환
scale_methods=[(None, None), ("Standard", None), ("Standard", 2),
                ("MinMax", None), ("MinMax", 2), ("Log", None)]

for scale_method in scale_methods :
    X_data_scaled = get_scaled_data(method=scale_method[0], p_degree=scale_method[1],
                                    input_data=house_price.data)
    print(f"\n## 변환 유형:{scale_method[0]}, Polynomial Degree:{scale_method[1]}")

    # alpha 값에 따른 회귀 모델의 폴드 평균 RMSE를 출력하고,
    # 회귀 계수값들을 DataFrame으로 반환해주는 함수
    get_linear_reg_eval("Ridge", params=alphas, X_data_n=X_data_scaled,
                        y_target_n=y_target, verbose=False)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression

cancer = load_breast_cancer()

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

scaler = StandardScaler()
data_scaled = scaler.fit_transform(cancer.data)

X_train, X_test, y_train, y_test = train_test_split(
        data_scaled, cancer.target, test_size=0.3, random_state=0
)

from sklearn.metrics import accuracy_score, roc_auc_score
lr_clf = LogisticRegression()
lr_clf.fit(X_train, y_train)
lr_preds = lr_clf.predict(X_test)

print(f'accuracy : {accuracy_score(y_test, lr_preds)}')
print(f'roc_auc : {roc_auc_score(y_test, lr_preds)}')

In [None]:
from sklearn.datasets import load_boston
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor 

boston = load_boston()
bostonDF = pd.DataFrame(boston.data, columns= boston.feature_names)


bostonDF['PRICE'] = boston.target
X_target = bostonDF['PRICE']
X_data = bostonDF.drop(['PRICE'], axis=1, inplace= False)

rf = RandomForestRegressor(random_state=0, n_estimators=1000)
neg_mse_scores = cross_val_score(rf, X_data, y_target, scoring='neg_mean_squared_error', cv=5)
rmse_scores = np.sqrt(-1 * neg_mse_scores)
avg_rmse = np.mean(rmse_scores)


dt_reg = DecisionTreeRegressor(random_state=0, max_depth=4)
rf_reg = RandomForestRegressor(random_state=0, n_estimators=1000)
xg_reg = XGBRegressor(random_state = 0, n_estimators=1000)

models = [dt_reg, rf_reg, xg_reg]
for model in models:
    get_model_cv_prediction(model=None, X_data=None, y_target=None)
    

print(f'5교차 검증의 개별 Negative MSE score : {np.round(neg_mse_scores, 2)}')
print(f'5교차 검증의 개별 RMSE scores : {np.round(rmse_scores, 2) }')
print(f'5교차 검증의 평균 RMSE : {avg_rmse : .3f}')

In [None]:
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor 

In [None]:
dt_reg = DecisionTreeRegressor(random_state=0, max_depth=4)
rf_reg = RandomForestRegressor(random_state=0, n_estimators=1000)
xg_reg = XGBRegressor(random_state = 0, n_estimators=1000)

models = [dt_reg, rf_reg, xg_reg]
for model in models:
    get_model_cv_prediction(model, X_data, y_target)

In [None]:
def get_model_cv_prediction(model=None, X_data=None, y_target=None) :
    neg_mse_scores = cross_val_score(model, X_data, y_target, scoring="neg_mean_squared_error", cv=5)
    rmse_scores = np.sqrt(-1*neg_mse_scores)
    avg_rmse = np.mean(rmse_scores)
    if model == dt_reg :
        print(f"### DecisionTreeRegressor ### \n5 교차 검증의 평균 RMSE : {np.round(avg_rmse,3)}")
    elif model == rf_reg :
        print(f"### RandomForestRegressor ### \n5 교차 검증의 평균 RMSE : {np.round(avg_rmse,3)}")
    else :
        print(f"### XGBRegressor ### \n5 교차 검증의 평균 RMSE : {np.round(avg_rmse,3)}")