In [4]:
# Step 1: Import libraries
import pandas as pd
import numpy as np
import joblib
from sklearn.linear_model import Lasso
from sklearn.model_selection import cross_val_score

# Step 2: Load cleaned training and test data
X = pd.read_csv("X_train_cleaned.csv")
y = pd.read_csv("y_train.csv").values.ravel()  # Flatten to 1D
X_test = pd.read_csv("X_test_cleaned.csv")

# Step 3: Define RMSE cross-validation function
def rmse_cv(model):
    rmse = -cross_val_score(model, X, y, scoring="neg_root_mean_squared_error", cv=5)
    return rmse.mean()

# Step 4: Train Lasso model with cross-validation
lasso = Lasso(alpha=0.001, max_iter=10000)
score = rmse_cv(lasso)
print(f"Lasso CV RMSE: {score:.4f}")

# Step 5: Fit and predict
lasso.fit(X, y)
lasso_preds = lasso.predict(X_test)

# Step 6: Convert predictions back from log1p
final_preds = np.expm1(lasso_preds)  # If y was log1p transformed

joblib.dump(lasso, "lasso_model.pkl")
# Step 7: Save submission file
submission = pd.DataFrame({
    "Id": np.arange(1461, 1461 + len(final_preds)),
    "SalePrice": final_preds
})
submission.to_csv("lasso_submission.csv", index=False)
print("Submission file saved as lasso_submission.csv")


Lasso CV RMSE: 0.1237
Submission file saved as lasso_submission.csv
