### Import methods

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import cross_validate
from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GridSearchCV

### Method that returns scores as a dataframe

In [None]:
def train_val(y_train, y_train_pred, y_test, y_pred, name):
    
    scores = {name+"_train": {"R2" : r2_score(y_train, y_train_pred),
    "mae" : mean_absolute_error(y_train, y_train_pred),
    "mse" : mean_squared_error(y_train, y_train_pred),                          
    "rmse" : np.sqrt(mean_squared_error(y_train, y_train_pred))},
    
    name+"_test": {"R2" : r2_score(y_test, y_pred),
    "mae" : mean_absolute_error(y_test, y_pred),
    "mse" : mean_squared_error(y_test, y_pred),
    "rmse" : np.sqrt(mean_squared_error(y_test, y_pred))}}
    
    return pd.DataFrame(scores)

### Method that returns the degree of best polynomial up to 5 

In [None]:
def poly(d):
    
    test_rmse_errors = []
    
    for i in range(1, d):
        polynomial_converter = PolynomialFeatures(degree = i, include_bias =False)
        poly_features = polynomial_converter.fit_transform(X)
        
        X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=58)
        
        scaler = StandardScaler()
        scaler.fit(X_train)
        X_train_scaled = scaler.transform(X_train) 
        X_test_scaled = scaler.transform(X_test)
        
        model = LinearRegression(fit_intercept=True)
        model.fit(X_train_scaled, y_train)
        
        train_pred = model.predict(X_train_scaled)
        test_pred = model.predict(X_test_scaled)
        
        test_RMSE = np.sqrt(mean_squared_error(y_test,test_pred))

        test_rmse_errors.append(test_RMSE)
    
    min_rmse = min(test_rmse_errors)
    
    return test_rmse_errors.index(min_rmse)

### Methods linear regression, RidgeCV, LassoCV, Elastic Net (gridsearch), polynomial regression

In [None]:
def methods_with_poly(X,y,test_s):
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = test_s, random_state = 58)
    
    #scaling
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train_scaled = scaler.transform(X_train) 
    X_test_scaled = scaler.transform(X_test)
    
    #linear regression
    lr = LinearRegression()
    lr.fit(X_train_scaled,y_train)
    y_pred_lr = lr.predict(X_test_scaled)
    y_train_pred_lr = lr.predict(X_train_scaled)
    ls = train_val(y_train, y_train_pred_lr, y_test, y_pred_lr, "linear")
    
    #Ridge CV
    alpha_space = np.linspace(0.01, 1, 100)
    ridge_cv_model = RidgeCV(alphas = alpha_space, cv = 5, scoring = "neg_root_mean_squared_error")
    ridge_cv_model.fit(X_train_scaled, y_train)
    y_pred_rcv = ridge_cv_model.predict(X_test_scaled)
    y_train_pred_rcv = ridge_cv_model.predict(X_train_scaled)
    rcs = train_val(y_train, y_train_pred_rcv, y_test, y_pred_rcv, "ridge_cv")
    
    #Lasso CV
    lasso_cv_model = LassoCV(alphas = alpha_space, cv = 5, max_iter=100000, random_state=58) 
    lasso_cv_model.fit(X_train_scaled, y_train)
    y_pred_lcv = lasso_cv_model.predict(X_test_scaled)  
    y_train_pred_lcv = lasso_cv_model.predict(X_train_scaled)
    lcs = train_val(y_train, y_train_pred_lcv, y_test, y_pred_lcv, "lasso_cv")
    
    #Elastic Net with Grid
    parametersGrid = {"max_iter": [1, 5, 10],
                      "alpha": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                      "l1_ratio": np.arange(0.0, 1.0, 0.1)}
    enet = ElasticNet()
    grid = GridSearchCV(enet, parametersGrid, scoring='r2', cv=5)
    grid.fit(X_train_scaled, y_train)
    y_pred_test_enet = grid.predict(X_test_scaled)
    y_pred_train_enet = grid.predict(X_train_scaled)
    enet = train_val(y_train, y_pred_train_enet, y_test, y_pred_test_enet, "ENet")
    
    #poly regression
    i = poly(5)
    polynomial_converter = PolynomialFeatures(degree = i, include_bias =False)
    poly_features = polynomial_converter.fit_transform(X)
        
    X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=58)
        
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train_scaled = scaler.transform(X_train) 
    X_test_scaled = scaler.transform(X_test)
        
    poly = LinearRegression(fit_intercept=True)
    poly.fit(X_train_scaled, y_train)
        
    y_train_pred_poly = poly.predict(X_train_scaled)
    y_test_pred_poly = poly.predict(X_test_scaled)
    
    poly = train_val(y_train, y_train_pred_poly, y_test, y_test_pred_poly, "poly")
    
    print(pd.concat([ls,rcs, lcs,poly, enet], axis = 1))
    

### Methods linear regression, RidgeCV, LassoCV, Elastic Net (gridsearch)

In [None]:
def methods(X,y,test_s):
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = test_s, random_state = 58)
    
    #scaling
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train_scaled = scaler.transform(X_train) 
    X_test_scaled = scaler.transform(X_test)
    
    #linear regression
    lr = LinearRegression()
    lr.fit(X_train_scaled,y_train)
    y_pred_lr = lr.predict(X_test_scaled)
    y_train_pred_lr = lr.predict(X_train_scaled)
    ls = train_val(y_train, y_train_pred_lr, y_test, y_pred_lr, "linear")
    
    #Ridge CV
    alpha_space = np.linspace(0.01, 1, 100)
    ridge_cv_model = RidgeCV(alphas = alpha_space, cv = 5, scoring = "neg_root_mean_squared_error")
    ridge_cv_model.fit(X_train_scaled, y_train)
    y_pred_rcv = ridge_cv_model.predict(X_test_scaled)
    y_train_pred_rcv = ridge_cv_model.predict(X_train_scaled)
    rcs = train_val(y_train, y_train_pred_rcv, y_test, y_pred_rcv, "ridge_cv")
    
    #Lasso CV
    lasso_cv_model = LassoCV(alphas = alpha_space, cv = 5, max_iter=100000, random_state=58) 
    lasso_cv_model.fit(X_train_scaled, y_train)
    y_pred_lcv = lasso_cv_model.predict(X_test_scaled)  
    y_train_pred_lcv = lasso_cv_model.predict(X_train_scaled)
    lcs = train_val(y_train, y_train_pred_lcv, y_test, y_pred_lcv, "lasso_cv")
    
    #Elastic Net with Grid
    parametersGrid = {"max_iter": [1, 5, 10],
                      "alpha": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                      "l1_ratio": np.arange(0.0, 1.0, 0.1)}
    enet = ElasticNet()
    grid = GridSearchCV(enet, parametersGrid, scoring='r2', cv=5)
    grid.fit(X_train_scaled, y_train)
    y_pred_test_enet = grid.predict(X_test_scaled)
    y_pred_train_enet = grid.predict(X_train_scaled)
    enet = train_val(y_train, y_pred_train_enet, y_test, y_pred_test_enet, "ENet")
    
    print(pd.concat([ls,rcs, lcs,poly, enet], axis = 1))
    