# Linear Regression Model as Baseline Model for sale price prediction

In [1]:
import json
import joblib
import numpy as np
import pandas as pd
from pathlib import Path

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.model_selection import GridSearchCV, KFold, train_test_split
from sklearn.metrics import make_scorer, mean_squared_error

In [2]:
DATA_DIR = Path("../Dataset")
TRAIN_CSV = DATA_DIR / "train_data_for_modeling(no_standardization).csv"
TEST_CSV  = DATA_DIR / "test_data_for_modeling.csv"
NUM_JSON  = DATA_DIR / "numerical_features.json"
ALL_JSON  = DATA_DIR / "all_final_features.json"

train_df = pd.read_csv(TRAIN_CSV)
test_df  = pd.read_csv(TEST_CSV)

with open(NUM_JSON, "r") as f:
    numerical_features = json.load(f)
with open(ALL_JSON, "r") as f:
    all_features = json.load(f)

print("check")

check


In [16]:
TARGET = "LOG_RESALE_PRICE" # use log for better linear regression, exp() later

ELASTIC_ALPHA = 0.1
ELASTIC_L1R   = 0.5
MAX_ITER = 5000
RANDOM_STATE = 42
VAL_SIZE = 0.2

X_train = train_df.drop(TARGET, axis=1)
y_train = train_df[TARGET]

numerical_features = [c for c in numerical_features if c in X_train.columns]

print("check")
preprocess = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_features)
    ],
    remainder="passthrough"
)

model = ElasticNet(alpha=ELASTIC_ALPHA, l1_ratio=ELASTIC_L1R, max_iter=MAX_ITER, random_state=RANDOM_STATE)

model = Pipeline(steps=[
    ("preprocess", preprocess),
    ("regressor", model)
])

X_tr, X_val, y_tr, y_val = train_test_split(
    X_train, y_train, test_size=VAL_SIZE, random_state=RANDOM_STATE
)

model.fit(X_tr, y_tr)

y_pred = model.predict(X_val)
y_pred_sgd = np.exp(y_pred)
y_val_sgd = np.exp(y_val)

rmse = np.sqrt(mean_squared_error(y_val_sgd, y_pred_sgd))
print(f"[ElasticNet] Validation RMSE (SGD): {rmse:,.2f}")
print("Done.")


check
[ElasticNet] Validation RMSE (SGD): 65,961.14
Done.
