# 1. Linear Regression

## 1.1 Ridge Regression

In [5]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
from sklearn.compose import ColumnTransformer
from sklearn.metrics import root_mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


data = pd.read_csv("data/processed.csv")

X = data.drop("price", axis=1)
X = X.drop("indicative_price", axis=1)
y = data["price"]

numerical_columns = [col for col in X.columns if col != "listing_id"]

preprocessor = ColumnTransformer(
transformers=[
    ("num", StandardScaler(), numerical_columns),
])

X_train_valid, X_test, y_train_valid, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_valid, y_train_valid, test_size=0.125, random_state=0)

for alpha in [0.1, 0.5, 1.0, 1.5, 2.0, 5.0]:
    ridge_model = Pipeline(steps=[
        ("preprocessor", preprocessor),
        ("model", Ridge(alpha=1.0))
    ])

    ridge_model.fit(X_train, y_train)

    y_pred_valid = ridge_model.predict(X_valid)
    valid_rmse = root_mean_squared_error(y_valid, y_pred_valid)
    y_pred_test = ridge_model.predict(X_test)
    test_rmse = root_mean_squared_error(y_test, y_pred_test)

    print(f"Ridge Regression with alpha={alpha}")
    print(f" - Validation RMSE: {valid_rmse}")
    print(f" - tTest RMSE: {test_rmse}")
    print()

Ridge Regression with alpha=0.1
 - Validation RMSE: 40723.29181818994
 - tTest RMSE: 45371.391363391376

Ridge Regression with alpha=0.5
 - Validation RMSE: 40723.29181818994
 - tTest RMSE: 45371.391363391376

Ridge Regression with alpha=1.0
 - Validation RMSE: 40723.29181818994
 - tTest RMSE: 45371.391363391376

Ridge Regression with alpha=1.5
 - Validation RMSE: 40723.29181818994
 - tTest RMSE: 45371.391363391376

Ridge Regression with alpha=2.0
 - Validation RMSE: 40723.29181818994
 - tTest RMSE: 45371.391363391376

Ridge Regression with alpha=5.0
 - Validation RMSE: 40723.29181818994
 - tTest RMSE: 45371.391363391376



# 2. Gradient Boosting

# 3. Deep Learning Method