# 03 — Modeling

## Goal
Build a clean preprocessing pipeline, compare three baselines, and write `submission.csv`.

In [None]:

import pandas as pd
from src.preprocessing import split_features_target, build_preprocessor, fit_transform, train_valid_split, save_submission
from src.modeling import fit_and_select

train = pd.read_csv("results/train_clean.csv")
test = pd.read_csv("results/test_clean.csv")

X, y = split_features_target(train, target="SalePrice")
X_test = test.copy()

pre, num_cols, cat_cols = build_preprocessor(X)
X_proc, X_test_proc, feature_names = fit_transform(pre, X, X_test)

X_tr, X_va, y_tr, y_va = train_valid_split(X_proc, y, test_size=0.2, random_state=7)

models, report = fit_and_select(X_tr, y_tr, X_va, y_va, random_state=7)
report.rmse_by_model, report.best_name


In [None]:

best_model = models[report.best_name]
test_preds = best_model.predict(X_test_proc)
test_ids = test["Id"] if "Id" in test.columns else pd.Series(range(1, len(test)+1))
out_path = save_submission(test_ids, test_preds, "results/submission.csv")
out_path
