In [7]:
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import joblib
import os

# Load dataset
df = pd.read_csv('train.csv')  # Adjust filename if needed

# Select features and target
features = ['GrLivArea', 'GarageCars', 'TotalBsmtSF', 'OverallQual']
target = 'SalePrice'

# Drop rows with missing values in selected columns
df = df[features + [target]].dropna()

X = df[features]
y = df[target]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Ridge Regression with scaling
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('ridge', Ridge(alpha=1.0))
])

# Train model
pipeline.fit(X_train, y_train)

# Save model
os.makedirs('model', exist_ok=True)
joblib.dump(pipeline, 'model/ridge_model.joblib')
print("✅ Model saved to model/ridge_model.joblib")


✅ Model saved to model/ridge_model.joblib
