# Ridge Regression

In [2]:
# import libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly as pl
from sklearn.linear_model import Ridge


In [3]:
# example data set
X = np.array([[1,1],[1,2],[2,2],[2,3]])
# target value
y = np.dot(X, np.array([1,2]))+3

In [4]:
# Ridge Regression Model
ridge_reg = Ridge(alpha = 1.0)
ridge_reg.fit(X,y)

In [5]:
# Coefficients
print("Coefficients", ridge_reg.coef_)
# Intercept
print("Intercept", ridge_reg.intercept_)

Coefficients [0.8 1.4]
Intercept 4.5


In [14]:
# Comparing Simple Linear regression Vs Ridge Regression
# import libraries
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score, mean_absolute_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [4]:
# import dataset
df = sns.load_dataset('titanic')

### Preprocess the data

In [5]:
# select the subsets of the data
col_to_use = ['survived','pclass','sex','age','fare']
df = df[col_to_use]
# handling missing values
# df['age'].fillna(df['age'].mean(), inplace=True)
df['age'] = df['age'].fillna(df['age'].mean())


In [6]:
# define feature and target Values
X = df.drop('survived', axis =1)
y = df['survived']
# Split the data into train test validate
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

In [7]:
    # Create pipeline for onehot encoding
    cat_feature = ['sex']
    num_feature = ['pclass', 'fare', 'age']
    
    # preprocess
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', 'passthrough', num_feature),
            ('cat', OneHotEncoder(), cat_feature)
        ]
    )
    # linear regression pipeline
    lr_pipeline = Pipeline(steps=[
                           ("preprocesor", preprocessor),
                           ("regressor", LinearRegression())                          
    ])
    # Ridge pipeline
    ridge_pipeline = Pipeline(steps = [
         ("preprocessor", preprocessor),
        ("regressor", Ridge(alpha = 1.0))
    ])

### Traina and evaluate

In [16]:
# train and evaluate the liner model
lr_pipeline.fit(X_train, y_train)
lr_pred = lr_pipeline.predict(X_test)
lr_mse = mean_squared_error(y_test, lr_pre)
lr_mape = mean_absolute_percentage_error(y_test, lr_pre)
lr_r2 = r2_score(y_test, lr_pred)
lr_mae = mean_absolute_error(y_test, lr_pred)
lr_mse = np.sqrt(lr_mse)
# train and evaluate the Ridge
ridge_pipeline.fit(X_train, y_train)
ridge_pred = ridge_pipeline.predict(X_test)
ridge_mse = mean_squared_error(y_test, lr_pre)
ridge_mape = mean_absolute_percentage_error(y_test, lr_pre)
ridge_r2 = r2_score(y_test, lr_pred)
ridge_mae = mean_absolute_error(y_test, lr_pred)
ridge_mse = np.sqrt(lr_mse)
# print the results
print("Linear_regression_MSE", lr_mse)
print("Ridge_regression_MSE", ridge_mse)

print("Linear_regression_r2", lr_mse)
print("Ridge_regression_r2", ridge_mse)

print("Linear_regression_MAE", lr_mse)
print("Ridge_regression_MAE", ridge_mse)

print("Linear_regression_MAPE", lr_mse)
print("Ridge_regression_MAPE", ridge_mse)

Linear_regression_MSE 0.37041911650449827
Ridge_regression_MSE 0.6086206671683917
Linear_regression_r2 0.37041911650449827
Ridge_regression_r2 0.6086206671683917
Linear_regression_MAE 0.37041911650449827
Ridge_regression_MAE 0.6086206671683917
Linear_regression_MAPE 0.37041911650449827
Ridge_regression_MAPE 0.6086206671683917
