# House Prices — End-to-End (Readable)

## End-to-End (Readable)

In [None]:

import pandas as pd
from src.preprocessing import (
    load_data, basic_clean, split_features_target,
    build_preprocessor, fit_transform, train_valid_split, save_submission
)
from src.visualization import plot_log_saleprice_hist, plot_scatter
from src.modeling import fit_and_select

bundle = load_data("data")
train, test = basic_clean(bundle.train, bundle.test)

# EDA
p1 = plot_log_saleprice_hist(train, "results/hist_log_saleprice.png")
p2 = plot_scatter(train, "GrLivArea", "results/scatter_GrLivArea_SalePrice.png")
p3 = plot_scatter(train, "OverallQual", "results/scatter_OverallQual_SalePrice.png")
[p1, p2, p3]


In [None]:

X, y = split_features_target(train, "SalePrice")
X_test = test.copy()
pre, num_cols, cat_cols = build_preprocessor(X)
X_proc, X_test_proc, feature_names = fit_transform(pre, X, X_test)

X_tr, X_va, y_tr, y_va = train_valid_split(X_proc, y, test_size=0.2, random_state=7)

models, report = fit_and_select(X_tr, y_tr, X_va, y_va, random_state=7)
print("RMSE by model:", report.rmse_by_model)
print("Best model:", report.best_name)

preds = models[report.best_name].predict(X_test_proc)
ids = test["Id"] if "Id" in test.columns else pd.Series(range(1, len(test)+1))
save_submission(ids, preds, "results/submission.csv")
"results/submission.csv"
