In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler, PolynomialFeatures
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Ridge
from sklearn.datasets import load_boston


In [194]:
def get_linear_reg_eval(model_name, params=None, X_data = None, y_target=None, visualize=False, columns=None):
    fig, axs = plt.subplots(figsize=(18, 6), nrows=1, ncols=5)
    coeff_df = pd.DataFrame()
    print("#### {} ####".format(model_name))
    params = params.copy()
    rmses = []
    for pos, param in enumerate(params):
        if model_name == "Ridge": model = Ridge(alpha=param)
        elif model_name == "Lasso": model= Lasso(alpha=param)
        elif model_name == "ElasticNet": model = ElasticNet(alpha=param, l1_ratio=0.7)
        
        neg_mse_scores = cross_val_score(model, X_data, y, scoring="neg_mean_squared_error",cv = 5)
        rmse_scores = np.sqrt(-1 * neg_mse_scores)
        avg_rmse = np.mean(rmse_scores)
        rmses.append(avg_rmse)
        print("# alpha = {} #".format(param))
        print("neg_mse_socres : {}".format(neg_mse_scores))
        print("rmse_scores : {}".format(rmse_scores))
        print("avg_rmse : {}\n".format(avg_rmse))
        
        model.fit(X_data, y_target)
        coeff = pd.Series(model.coef_, index=columns)
        colname="alpha="+str(param)
        coeff_df[colname] = coeff

        coeff = coeff.sort_values(ascending=False)
        axs[pos].set_title(colname)
        axs[pos].set_xlim(-3, 6)
        sns.barplot(x=coeff.values, y=coeff.index, ax=axs[pos])
    
        
    if visualize == False:
        plt.close()
    return rmses

def get_boston_Xy():
    boston = load_boston()
    df = pd.DataFrame(data=boston.data, columns=boston.feature_names)
    df["PRICE"] = boston.target
    df.head()
    X = df.iloc[:,:-1]
    y = df.iloc[:, -1]
    return X, y

def get_scaled_data(method=None, input_data=None):
    if method == "Standard":
        scaled_data = StandardScaler().fit_transform(input_data)
    elif method == "MinMax":
        scaled_data = MinMaxScaler().fit_transform(input_data)
    elif method == "Log":
        scaled_data = np.log1p(input_data)
    else:
        scaled_data = input_data
    return scaled_data

In [195]:
X, y = get_boston_Xy()
alphas = [0.1, 1, 10, 100]
model_name = "Ridge"

scale_methods = ["Standard", "MinMax", "Log", None]

df_columns = alphas.copy()
df_columns.insert(0, "scaled_method")
print("df_columns ", df_columns)
df = pd.DataFrame(columns=df_columns)
for scale_method in scale_methods:
    scaled_data = get_scaled_data(method=scale_method, input_data=X)
    print("# scale method = {}#".format(scale_method))
    print("----------------------")
    #print(scaled_data)
    rmses = get_linear_reg_eval(model_name, params=alphas, X_data=scaled_data, y_target=y, columns=X.columns)
    rmses.insert(0, scale_method)
    rmse_series = pd.Series(rmses, index=df_columns)
    df = df.append(rmse_series.to_frame().T)

df_columns  ['scaled_method', 0.1, 1, 10, 100]
# scale method = Standard#
----------------------
#### Ridge ####
# alpha = 0.1 #
neg_mse_socres : [-12.44568311 -26.02064789 -33.06467855 -80.73668697 -33.24130379]
rmse_scores : [3.52784398 5.1010438  5.75018944 8.98535959 5.76552719]
avg_rmse : 5.825992799389448

# alpha = 1 #
neg_mse_socres : [-12.31765541 -25.77825091 -32.98446079 -80.50697961 -32.62453389]
rmse_scores : [3.50965175 5.07722866 5.74320997 8.97256817 5.71178903]
avg_rmse : 5.802889517257745

# alpha = 10 #
neg_mse_socres : [-11.31538047 -24.11681267 -32.54144074 -78.38652815 -28.63649231]
rmse_scores : [3.36383419 4.91088716 5.70451056 8.85361667 5.35130753]
avg_rmse : 5.636831222559074

# alpha = 100 #
neg_mse_socres : [ -9.72983015 -24.5013748  -36.5958468  -68.91738332 -21.96440791]
rmse_scores : [3.11926757 4.94988634 6.04945012 8.30164943 4.68662009]
avg_rmse : 5.421374711794968

[5.825992799389448, 5.802889517257745, 5.636831222559074, 5.421374711794968]
# scale m

In [196]:
df

Unnamed: 0,scaled_method,0.1,1,10,100
0,Standard,5.82599,5.80289,5.63683,5.42137
0,MinMax,5.76357,5.46505,5.75416,7.63492
0,Log,4.77043,4.67623,4.83644,6.24092
0,,5.78849,5.65257,5.51817,5.32959
