In [4]:
# Step 1: Import libraries
import pandas as pd
import numpy as np
import joblib
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score

# Step 2: Load cleaned data
X = pd.read_csv("X_train_cleaned.csv")
y = pd.read_csv("y_train.csv").values.ravel()
X_test = pd.read_csv("X_test_cleaned.csv")

# Step 3: Define RMSE cross-validation function
def rmse_cv(model):
    rmse = -cross_val_score(model, X, y, scoring="neg_root_mean_squared_error", cv=5)
    return rmse.mean()

# Step 4: Train Ridge model
ridge_model = Ridge(alpha=10)
score = rmse_cv(ridge_model)
print(f"Ridge CV RMSE: {score:.4f}")

# Step 5: Fit full data and predict
ridge_model.fit(X, y)
ridge_preds_log = ridge_model.predict(X_test)
final_preds = np.expm1(ridge_preds_log)  # If y = log1p(SalePrice)

# Step 6: Save model
joblib.dump(ridge_model, "ridge_model.pkl")

# Step 7: Save prediction results
submission = pd.DataFrame({
    "Id": np.arange(1461, 1461 + len(final_preds)),
    "SalePrice": final_preds
})
submission.to_csv("ridge_submission.csv", index=False)
print("Submission file saved as ridge_submission.csv")


Ridge CV RMSE: 0.1273
Submission file saved as ridge_submission.csv
