In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score
import pickle

In [2]:
data = pd.read_csv('train.csv')

In [3]:
print(data.head())
print(data.shape)
print(data.info())

   beds  baths    size size_units  lot_size lot_size_units  zip_code  \
0     3    2.5  2590.0       sqft   6000.00           sqft     98144   
1     4    2.0  2240.0       sqft      0.31           acre     98106   
2     4    3.0  2040.0       sqft   3783.00           sqft     98107   
3     4    3.0  3800.0       sqft   5175.00           sqft     98199   
4     2    2.0  1042.0       sqft       NaN            NaN     98102   

       price  
0   795000.0  
1   915000.0  
2   950000.0  
3  1950000.0  
4   950000.0  
(2016, 8)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2016 entries, 0 to 2015
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   beds            2016 non-null   int64  
 1   baths           2016 non-null   float64
 2   size            2016 non-null   float64
 3   size_units      2016 non-null   object 
 4   lot_size        1669 non-null   float64
 5   lot_size_units  1669 non-null   object 
 

In [4]:
print(data.isna().sum())
data.drop(columns=['lot_size', 'lot_size_units'], inplace=True)

beds                0
baths               0
size                0
size_units          0
lot_size          347
lot_size_units    347
zip_code            0
price               0
dtype: int64


In [5]:
data.drop(columns=['size_units'], inplace=True)  # Drop unused columns


In [6]:
X = data.drop(columns=['price'])
y = data['price']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


In [8]:
column_trans = make_column_transformer((OneHotEncoder(sparse_output=False), ['beds']), remainder='passthrough')
scaler = StandardScaler()

In [9]:
pipe_lr = make_pipeline(column_trans, scaler, LinearRegression())
pipe_lr.fit(X_train, y_train)
y_pred_lr = pipe_lr.predict(X_test)
print("No Regularization R² Score: ", r2_score(y_test, y_pred_lr))

No Regularization R² Score:  0.5728152391843129


In [10]:
pipe_lasso = make_pipeline(column_trans, scaler, Lasso())
pipe_lasso.fit(X_train, y_train)
y_pred_lasso = pipe_lasso.predict(X_test)
print("Lasso R² Score: ", r2_score(y_test, y_pred_lasso))

Lasso R² Score:  0.5746817917322382


In [11]:
pipe_ridge = make_pipeline(column_trans, scaler, Ridge())
pipe_ridge.fit(X_train, y_train)
y_pred_ridge = pipe_ridge.predict(X_test)
print("Ridge R² Score: ", r2_score(y_test, y_pred_ridge))

Ridge R² Score:  0.5746884627878555


In [12]:
pickle.dump(pipe_ridge, open('RidgeModel.pkl', 'wb'))


In [2]:
%pip install scikit-learn==1.3.0


Collecting scikit-learn==1.3.0
  Using cached scikit_learn-1.3.0-cp311-cp311-win_amd64.whl.metadata (11 kB)
Using cached scikit_learn-1.3.0-cp311-cp311-win_amd64.whl (9.2 MB)
Installing collected packages: scikit-learn
Successfully installed scikit-learn-1.3.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
%pip install numpy==1.23.5


Collecting numpy==1.23.5
  Using cached numpy-1.23.5-cp311-cp311-win_amd64.whl.metadata (2.3 kB)
Using cached numpy-1.23.5-cp311-cp311-win_amd64.whl (14.6 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.1.2
    Uninstalling numpy-2.1.2:
      Successfully uninstalled numpy-2.1.2
Successfully installed numpy-1.23.5
Note: you may need to restart the kernel to use updated packages.


  You can safely remove it manually.
  You can safely remove it manually.

[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip
