# Regularized Regression

## IMPORTS

In [1]:
import statsmodels.api as sm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt



## Regularized Regression on AutoMPG

In [3]:
def RegRegAutoMPG():
    #Importing and cleaning
    auto_mpg = pd.read_csv("auto_mpg_cleaned.csv")
    auto_mpg = auto_mpg.dropna()
    auto_mpg = auto_mpg.drop('origin', axis=1)

    # Defining the predictor and result variables
    auto_mpg_x = auto_mpg[["displacement", "cylinders", "horsepower" , "weight", "acceleration", "model_year"]]
    auto_mpg_y = auto_mpg["mpg"]

    #Ridge Regression
    ridge_reg = sm.OLS(auto_mpg_y, auto_mpg_x)
    ridge_result = ridge_reg.fit_regularized(L1_wt = 0)
    print("Ridge Regression Results for AutoMPG")
    print(ridge_result.params)

    #Lasso Regression
    lasso_reg = sm.OLS(auto_mpg_y, auto_mpg_x)
    lasso_result = lasso_reg.fit_regularized(L1_wt = 1)
    print("Lasso Regression Results for AutoMPG")
    print(lasso_result.params)

In [4]:
RegRegAutoMPG()

Ridge Regression Results for AutoMPG
[ 0.01022108 -0.5226089  -0.020873   -0.00639456 -0.05202195  0.61025869]
Lasso Regression Results for AutoMPG
displacement   -0.060331
cylinders       1.209181
horsepower      0.053164
weight         -0.003125
acceleration    0.886662
model_year      0.244719
dtype: float64


## Regularized Regression on House Prices

In [5]:
def RegRegHousePrice():
    #Importing and cleaning
    House_Price = pd.read_csv('house_price_regression_dataset.csv')

    # Defining the predictor and result variables
    house_price_x = House_Price[['Square_Footage', 'Num_Bedrooms', 'Num_Bathrooms', 'Year_Built', 'Lot_Size', 'Garage_Size', 'Neighborhood_Quality']]
    house_price_y = House_Price['House_Price']

    #Ridge Regression
    ridge_reg = sm.OLS(house_price_y, house_price_x).fit_regularized(L1_wt = 0)
    print("Ridge Regression Results for House Prices")
    print(ridge_reg.params)

    #Lasso Regression
    lasso_reg = sm.OLS(house_price_y, house_price_x).fit_regularized(L1_wt = 1)
    print("Lasso Regression Results for House Prices")
    print(lasso_reg.params)

In [6]:
RegRegHousePrice()

Ridge Regression Results for House Prices
[ 1.99083165e+02  9.63843780e+03  7.17574297e+03 -1.31751186e+01
  1.36868676e+04  4.34249077e+03 -1.23555280e+02]
Lasso Regression Results for House Prices
Square_Footage            199.044230
Num_Bedrooms             9554.852139
Num_Bathrooms            6668.601627
Year_Built                -12.607497
Lot_Size                13716.040126
Garage_Size              4401.464565
Neighborhood_Quality     -110.684929
dtype: float64


## Regularized Regression on Insurance

In [7]:
def RegRegInsurance():
    #Importing and cleaning
    Insurance_Charges = pd.read_csv('insurance_cat2num.csv')

    # Defining the predictor and result variables
    insurance_x = Insurance_Charges[['intercept', 'age', 'bmi', 'children', 'sex_male', 'smoker_yes', 'region_northwest', 'region_southeast', 'region_southwest']]
    insurance_y = Insurance_Charges['charges']

    #Ridge Regression
    ridge_reg = sm.OLS(insurance_y, insurance_x).fit_regularized(L1_wt = 0)
    print("Ridge Regression Results for Insurance Charges")
    print(ridge_reg.params)

    #Lasso Regression
    lasso_reg = sm.OLS(insurance_y, insurance_x).fit_regularized(L1_wt = 1)
    print("Lasso Regression Results for Insurance Charges")
    print(lasso_reg.params)

In [8]:
RegRegInsurance()

Ridge Regression Results for Insurance Charges
[-11938.53857617    256.85635254    339.19345361    475.50054515
   -131.3143594   23848.53454191   -352.96389942  -1035.02204939
   -960.0509913 ]
Lasso Regression Results for Insurance Charges
intercept           -7985.120622
age                   264.323841
bmi                   223.941909
children              385.340193
sex_male             -340.798890
smoker_yes          23668.031647
region_northwest     -994.299554
region_southeast    -1186.376086
region_southwest    -1441.595498
dtype: float64
