In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, PowerTransformer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LassoCV
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.feature_selection import SelectFromModel
from google.colab import files


Please upload your train.csv file:


Saving train_preprocessed.csv to train_preprocessed.csv
Please upload your test.csv file:


Saving test.csv to test.csv

📊 Lasso Regression Performance:
R² Score: 0.8138
RMSE: 28790.84

✅ Lasso predictions saved to 'hotel_predictions_lasso.csv'


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Upload Files
print("Please upload your train.csv file:")
uploaded_train = files.upload()

print("Please upload your test.csv file:")
uploaded_test = files.upload()

train_filename = list(uploaded_train.keys())[0]
test_filename = list(uploaded_test.keys())[0]


In [None]:
# Load Data
train_df = pd.read_csv(train_filename)
test_df = pd.read_csv(test_filename)

target_col = "HotelValue"
test_ids = test_df["Id"]

train_df = train_df.drop(columns=["Id"])
test_df = test_df.drop(columns=["Id"])

X = train_df.drop(columns=[target_col])
y = train_df[target_col]
X_test = test_df.copy()

In [None]:
# Identify Column Types
num_cols = X.select_dtypes(include=['int64', 'float64']).columns
cat_cols = X.select_dtypes(include=['object']).columns

In [None]:
# Preprocessing Pipelines
num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
    ('power', PowerTransformer(method='yeo-johnson'))
])

cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ('num', num_pipeline, num_cols),
    ('cat', cat_pipeline, cat_cols)
])

In [None]:
# Split Train/Validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Lasso Regression Model
lasso = Pipeline([
    ('preprocessor', preprocessor),
    ('feature_select', SelectFromModel(LassoCV(alphas=np.logspace(-3, 2, 30), cv=5, max_iter=10000))),
    ('regressor', LassoCV(alphas=np.logspace(-3, 2, 30), cv=10, max_iter=10000))
])

In [None]:
# Train and Evaluate
lasso.fit(X_train, y_train)
y_pred = lasso.predict(X_val)

r2 = r2_score(y_val, y_pred)
rmse = np.sqrt(mean_squared_error(y_val, y_pred))

print(f"\n Lasso Regression Performance:")
print(f"R² Score: {r2:.4f}")
print(f"RMSE: {rmse:.2f}")

In [None]:
# Retrain on Full Data and Predict
lasso.fit(X, y)
final_predictions = lasso.predict(X_test)

In [None]:
# Save Predictions
output = pd.DataFrame({
    "Id": test_ids,
    "HotelValue": np.round(final_predictions, 2)
})

output.to_csv("hotel_predictions_lasso.csv", index=False)
print("\n Lasso predictions saved to 'hotel_predictions_lasso.csv'")

files.download("hotel_predictions_lasso.csv")