### Linear Models
- Ridge Regression
- Lasso Regression
- Elastic Net
- SGD Regression
- Polynomial Linear Regression

Reference: http://scikit-learn.org/stable/modules/linear_model.html

In [None]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import minmax_scale, PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge, Lasso, ElasticNet, SGDRegressor, LinearRegression

### DATA PREPARATION & PREPROCESSING PIPELINE

In [None]:
def load_data():
    df = pd.read_csv("../data/autos_removed_target_mv.csv")
    return df

# drop all missing vals
def handling_mv(df):
    df = df.replace("?", np.nan)
    df = df.dropna()
    return df

# preprocessing
def preprocessing(df):
    y = df['normalized-losses']
    X = df.ix[:, 1:]
    
    # divide real and cat vals
    cols_real = list(X.describe().columns)
    cols_cat = [col for col in X.columns if col not in cols_real]
    X_real = X[cols_real]
    X_cat = X[cols_cat]
    
    # scaling real vlas (minmax scale; 0~1)
    X_scaled = minmax_scale(X_real)
    dfX_scale = pd.DataFrame(X_scaled, columns=cols_real)

    # dummies cat vals
    X_dummies = pd.get_dummies(X_cat)
    
    # reindex & merge real and cat DF
    dfX_scale.reset_index(drop=True, inplace=True)
    X_dummies.reset_index(drop=True, inplace=True)
    X_pre = pd.concat([dfX_scale, X_dummies], axis=1)
    
    return X_pre, y

In [None]:
df = load_data()
df = handling_mv(df)
X_pre, y = preprocessing(df)

In [None]:
# grid search cv
def modeling(algo, param_name, param_grid, cv=5, scoring="neg_mean_squared_error"):
    model = GridSearchCV(algo,
                        {param_name: param_grid},
                        cv=cv,
                        scoring=scoring)
    return model.fit(X_pre, y)

# parameter search graph
def param_graph(param_name):
    rmse = (-model.cv_results_['mean_test_score'])**(1/2)
    
    plt.style.use("seaborn-white")
    plt.scatter(x=model.param_grid[param_name], y=rmse)
    
    print("best param: ", model.best_params_, "\nmin rmse: ", np.min(rmse))

In [None]:
model = modeling(Ridge(), "alpha", np.logspace(-2, 1, 100), cv=10)

In [None]:
param_graph("alpha")