In [None]:
# Prepare a L1(Lasso)and L2(Ridge) Regression models for prediction of House prices.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression,Lasso,Ridge #Import Lasso,Ridge
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler


df = pd.read_csv("/content/housing (3).csv")

#df.head()

#target variable encoding

df["ocean_proximity"] = df.groupby("ocean_proximity")["median_house_value"].transform("mean")

#df.head()

#Feature Scaling: scaling all values 0 to 1, so it doesn't perceive higher numerical values as having more importance.
scaler = MinMaxScaler()
df1 = pd.DataFrame(scaler.fit_transform(df),columns = df.columns)

#df.isnull() to check for null values
df1.fillna(999,inplace=True) #inplace = True makes the changes permanent and replaces the null values with 999

#Splitting into Training and testing data
y = df1["median_house_value"]
x = df1.drop("median_house_value",axis=1)

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.3,random_state=1)
#Ques:random_state kya hota h

#Preparing the 3 Regression Models:
linear_model = LinearRegression()
lasso_model = Lasso(alpha=0.1)
ridge_model = Ridge(alpha=0.1)
#alpha is the Regularisation parameter which we are adding as an error term to our model
#We are not taking the errors added in the feature. We are only taking the Regularisation parameter. Not taking the error in each feature weight etc that takes time.
# So we providing error only as alpha value or Lambda value in here, thats the only penalty term we're providing here.

linear_model.fit(x_train,y_train)
lasso_model.fit(x_train,y_train)
ridge_model.fit(x_train,y_train)


print(linear_model.coef_)
print(lasso_model.coef_)
print(ridge_model.coef_)
#ques: coef_ and intercept_ ????
print(linear_model.intercept_)
print(lasso_model.intercept_)
print(ridge_model.intercept_)









[-5.21369632e-01 -4.80885402e-01  1.05448058e-01 -9.95155295e-02
  9.05811317e-07 -2.96900236e+00  1.71684723e+00  1.14140894e+00
  1.78608947e-01]
[-0. -0.  0.  0.  0. -0.  0.  0.  0.]
[-5.19071574e-01 -4.77794156e-01  1.05589015e-01 -8.48795182e-02
  8.27450951e-07 -2.83592891e+00  1.64191665e+00  1.13973254e+00
  1.80420220e-01]
0.40752917375286374
0.3972234099154518
0.4050122758456162


In [None]:
linear_model.score(x_test,y_test)

0.6363169885864803

In [None]:
lasso_model.score(x_test,y_test)

-0.0005372966032284321

In [None]:
ridge_model.score(x_test,y_test)

0.636010311671446

In [None]:
# Mean squared error for LinearRegression
linear_train_mse = mean_squared_error(y_train,linear_model.predict(x_train))
linear_test_mse = mean_squared_error(y_test,linear_model.predict(x_test))
print("Train:",linear_train_mse)
print("Test:",linear_test_mse)



Train: 0.02023851649428413
Test: 0.0203262390236933


In [None]:
#MSE for lasso
lasso_train_mse = mean_squared_error(y_train,lasso_model.predict(x_train))
lasso_test_mse = mean_squared_error(y_test,lasso_model.predict(x_test))
print("Train:",lasso_train_mse)
print("Test:",lasso_test_mse)

Train: 0.056905352645749696
Test: 0.05592001717053842


In [None]:
# MSE for Ridge
ridge_train_mse = mean_squared_error(y_train,ridge_model.predict(x_train))
ridge_test_mse = mean_squared_error(y_test,ridge_model.predict(x_test))
print("Train:",ridge_train_mse)
print("Test:",ridge_test_mse)


Train: 0.020242018001132043
Test: 0.020343379192693252
