# 1. Linear Regression

## 1.1 Ridge Regression

In [8]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
from sklearn.compose import ColumnTransformer
from sklearn.metrics import root_mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


data = pd.read_csv("data/processed.csv")

X = data.drop("price", axis=1)
X = X.drop("indicative_price", axis=1)
y = data["price"]

numerical_columns = [col for col in X.columns if col != "listing_id"]

preprocessor = ColumnTransformer(
transformers=[
    ("num", StandardScaler(), numerical_columns),
])

X_train_valid, X_test, y_train_valid, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_valid, y_train_valid, test_size=0.125, random_state=0)

for alpha in [0.1, 0.5, 1.0, 1.5, 2.0, 5.0]:
    elastic_net = Pipeline(steps=[
        ("preprocessor", preprocessor),
        ("model", Ridge(alpha=alpha))
    ])

    elastic_net.fit(X_train, y_train)

    y_pred_valid = elastic_net.predict(X_valid)
    valid_rmse = root_mean_squared_error(y_valid, y_pred_valid)
    y_pred_test = elastic_net.predict(X_test)
    test_rmse = root_mean_squared_error(y_test, y_pred_test)

    print(f"Ridge Regression with alpha={alpha}")
    print(f" - Validation RMSE: {valid_rmse}")
    print(f" - Test RMSE: {test_rmse}")
    if alpha != 5.0:
        print()

Ridge Regression with alpha=0.1
 - Validation RMSE: 40728.987116248856
 - Test RMSE: 45373.149174522216

Ridge Regression with alpha=0.5
 - Validation RMSE: 40726.45094978945
 - Test RMSE: 45372.3655707262

Ridge Regression with alpha=1.0
 - Validation RMSE: 40723.29181818994
 - Test RMSE: 45371.391363391376

Ridge Regression with alpha=1.5
 - Validation RMSE: 40720.14492832289
 - Test RMSE: 45370.42300670021

Ridge Regression with alpha=2.0
 - Validation RMSE: 40717.010210546425
 - Test RMSE: 45369.46046305184

Ridge Regression with alpha=5.0
 - Validation RMSE: 40698.453682369276
 - Test RMSE: 45363.80520752782



## 1.2 Lasso Regression

In [11]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Lasso
from sklearn.compose import ColumnTransformer
from sklearn.metrics import root_mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


data = pd.read_csv("data/processed.csv")

X = data.drop("price", axis=1)
X = X.drop("indicative_price", axis=1)
y = data["price"]

numerical_columns = [col for col in X.columns if col != "listing_id"]

preprocessor = ColumnTransformer(
transformers=[
    ("num", StandardScaler(), numerical_columns),
])

X_train_valid, X_test, y_train_valid, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_valid, y_train_valid, test_size=0.125, random_state=0)

for alpha in [0.1, 0.5, 1.0, 1.5, 2.0, 5.0]:
    elastic_net = Pipeline(steps=[
        ("preprocessor", preprocessor),
        ("model", Lasso(alpha=alpha))
    ])

    elastic_net.fit(X_train, y_train)

    y_pred_valid = elastic_net.predict(X_valid)
    valid_rmse = root_mean_squared_error(y_valid, y_pred_valid)
    y_pred_test = elastic_net.predict(X_test)
    test_rmse = root_mean_squared_error(y_test, y_pred_test)

    print(f"Lasso Regression with alpha={alpha}")
    print(f" - Validation RMSE: {valid_rmse}")
    print(f" - Test RMSE: {test_rmse}")
    if alpha != 5.0:
        print()

Lasso Regression with alpha=0.1
 - Validation RMSE: 40729.37517324335
 - Test RMSE: 45373.25014701904

Lasso Regression with alpha=0.5
 - Validation RMSE: 40728.38756387951
 - Test RMSE: 45372.86875265672

Lasso Regression with alpha=1.0
 - Validation RMSE: 40727.15335036069
 - Test RMSE: 45372.392652519215

Lasso Regression with alpha=1.5
 - Validation RMSE: 40725.92185230695
 - Test RMSE: 45371.918100296425

Lasso Regression with alpha=2.0
 - Validation RMSE: 40724.688597107874
 - Test RMSE: 45371.4435337523

Lasso Regression with alpha=5.0
 - Validation RMSE: 40717.335681935554
 - Test RMSE: 45368.624994196856



## 1.3 Elastic Net

In [33]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.linear_model import ElasticNet
from sklearn.compose import ColumnTransformer
from sklearn.metrics import root_mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


data = pd.read_csv("data/processed.csv")

X = data.drop("price", axis=1)
X = X.drop("indicative_price", axis=1)
y = data["price"]

numerical_columns = [col for col in X.columns if col != "listing_id"]

preprocessor = ColumnTransformer(
transformers=[
    ("num", StandardScaler(), numerical_columns),
])

X_train_valid, X_test, y_train_valid, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_valid, y_train_valid, test_size=0.125, random_state=0)

elastic_net = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("model", ElasticNet(alpha=0.1, l1_ratio=0.9))
])

elastic_net.fit(X_train, y_train)

y_pred_valid = elastic_net.predict(X_valid)
valid_rmse = root_mean_squared_error(y_valid, y_pred_valid)
y_pred_test = elastic_net.predict(X_test)
test_rmse = root_mean_squared_error(y_test, y_pred_test)

print(f"Elastic Net with alpha=0.1 and l1_ratio=0.9")
print(f" - Validation RMSE: {valid_rmse}")
print(f" - Test RMSE: {test_rmse}")
print()

Elastic Net with alpha=0.1 and l1_ratio=0.9
 - Validation RMSE: 40070.655814503596
 - Test RMSE: 45234.003265580985



# 2. Gradient Boosting

# 3. Deep Learning Method