In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv("/home/inventor/Datasets/California House Price/Cal_house/housing.csv")

In [3]:
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [4]:
df.shape

(20640, 10)

In [5]:
df["total_bedrooms"] = df["total_bedrooms"].fillna(df["total_bedrooms"].mean())

In [6]:
df.isnull().sum()

longitude             0
latitude              0
housing_median_age    0
total_rooms           0
total_bedrooms        0
population            0
households            0
median_income         0
median_house_value    0
ocean_proximity       0
dtype: int64

In [7]:
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [8]:
# Change the Datatypes

df["housing_median_age"] = df["housing_median_age"].astype("int64")
df["total_rooms"] = df["total_rooms"].astype("int64")
df["total_bedrooms"] = df["total_bedrooms"].astype("int64")
df["population"] = df["population"].astype("int64")
df["households"] = df["households"].astype("int64")
df["median_house_value"] = df["median_house_value"].astype("int64")

In [9]:
# Rename the Feature Name

df.rename(columns={"median_house_value": "house_price"}, inplace=True)

In [10]:
num_cols = [feature for feature in df.columns if df[feature].dtype in ["float64", "int64"]]
cat_cols = [feature for feature in df.columns if df[feature].dtype not in ["float64", "int64"]]

In [11]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

df[num_cols] = scaler.fit_transform(df[num_cols])

df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,house_price,ocean_proximity
0,-1.327835,1.052548,0.982143,-0.804819,-0.975207,-0.974429,-0.977033,2.344766,2.129631,NEAR BAY
1,-1.322844,1.043185,-0.607019,2.04589,1.355109,0.861439,1.669961,2.332238,1.314156,NEAR BAY
2,-1.332827,1.038503,1.856182,-0.535746,-0.829711,-0.820777,-0.843637,1.782699,1.258693,NEAR BAY
3,-1.337818,1.038503,1.856182,-0.624215,-0.722378,-0.766028,-0.733781,0.932968,1.1651,NEAR BAY
4,-1.337818,1.038503,1.856182,-0.462404,-0.615046,-0.759847,-0.629157,-0.012881,1.1729,NEAR BAY


In [12]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse_output=False, handle_unknown="ignore", drop="first")

encoded = encoder.fit_transform(df[cat_cols])

encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(cat_cols))

encoded_df = pd.concat([df, encoded_df], axis=1)

In [13]:
encoded_df.drop("ocean_proximity", axis=1, inplace=True)

In [14]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(encoded_df.drop("house_price", axis=1), df["house_price"], test_size=0.2, random_state=42)

In [16]:
X_train.shape, X_test.shape

((16512, 12), (4128, 12))

In [17]:
from sklearn.linear_model import ElasticNet

elasic = ElasticNet(alpha=0.01, l1_ratio=0.5)

elasic.fit(X_train, y_train)

y_pred_sk = elasic.predict(X_test)

In [18]:
from sklearn.metrics import r2_score

r2_score_sk =r2_score(y_test, y_pred_sk)

r2_score_sk

0.6253017455767639

In [19]:
from sklearn.metrics import mean_squared_error

mse_sk = mean_squared_error(y_test, y_pred_sk)

mse_sk

np.float64(0.3687490864575188)

In [20]:
class Elastic_Net_Custom():

    def __init__(self, learning_rate, epochs, alpha, l1_ratio):

        self.coef_ = None
        self.lr = learning_rate
        self.epochs = epochs
        self.alpha = alpha
        self.l1_ratio = l1_ratio
    
    def fit(self, X_train, y_train):

        X_train = np.insert(X_train, 0, 1, axis=1)

        self.coef_ = np.random.randn(X_train.shape[1]) * 0.01

        for i in range(self.epochs):

            y_hat = np.dot(X_train, self.coef_)

            coef_slope = np.dot(X_train.T, X_train).dot(self.coef_) - np.dot(y_train.T, X_train) + np.dot(self.alpha, (np.dot(self.l1_ratio, np.sign(self.coef_)) + np.dot((1 - self.l1_ratio), self.coef_)))

            self.coef_ = self.coef_ - (self.lr * coef_slope)
        
        print(f"Coef_: {self.coef_}")
    
    def predict(self, X_test):

        X_test = np.insert(X_test, 0, 1, axis=1)
        y_pred = np.dot(X_test, self.coef_)
        return y_pred

In [21]:
elastic_custom = Elastic_Net_Custom(learning_rate=0.00001, epochs=1000, alpha=0.01, l1_ratio=0.5)

In [22]:
elastic_custom.fit(X_train, y_train)

Coef_: [ 0.11121516 -0.45939275 -0.46482041  0.12040148 -0.11115335  0.36188652
 -0.3769895   0.17108842  0.64905021 -0.34974201  0.05376084 -0.04465786
  0.02996339]


In [23]:
y_pred = elastic_custom.predict(X_test)

In [26]:
r2_score_c = r2_score(y_test, y_pred)

mse_c = mean_squared_error(y_test, y_pred)

print(f"R2 Score Sickit learn: {r2_score_sk}")
print(f"R2 Score Custom: {r2_score_c}")
print(f"Mean Square Error Scikit learn: {mse_sk}")
print(f"Mean Square Error Custom: {mse_c}")

R2 Score Sickit learn: 0.6253017455767639
R2 Score Custom: 0.6254835470900971
Mean Square Error Scikit learn: 0.3687490864575188
Mean Square Error Custom: 0.36857017144746246
