## Naive Linear regression model

In [17]:
import os, sys
import numpy as np
import pandas as pd

from config_local import local_config

In [18]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error

In [19]:
train = pd.read_csv(local_config.TRAIN_PROCESS6_CSV)
test  = pd.read_csv(local_config.TEST_PROCESS6_CSV)
testRaw = pd.read_csv(local_config.TEST_CSV, index_col="Id")

In [20]:
y = train['logSP']
X = train.drop(['logSP'], axis=1)

In [21]:
# ============ 2. K-Fold CV ============
n_splits = 5  # you can change to 10 later if you want
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

rmse_scores = []

for fold, (train_idx, val_idx) in enumerate(kf.split(X), start=1):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predict in log-space
    y_val_pred_log = model.predict(X_val)

    # Back-transform to real prices
    y_val_pred = np.exp(y_val_pred_log)
    y_val_real = np.exp(y_val)

    mse = mean_squared_error(y_val_real, y_val_pred)
    rmse = np.sqrt(mse)
    rmse_scores.append(rmse)

    print(f"Fold {fold}: RMSE = {rmse:.4f}")

print("\n==== K-Fold CV with LinearRegression ====")
print(f"Mean RMSE: {np.mean(rmse_scores):.4f}")
print(f"Std  RMSE: {np.std(rmse_scores):.4f}")

Fold 1: RMSE = 21837.0597
Fold 2: RMSE = 25367.6085
Fold 3: RMSE = 20280.2299
Fold 4: RMSE = 26932.8301
Fold 5: RMSE = 21780.1981

==== K-Fold CV with LinearRegression ====
Mean RMSE: 23239.5852
Std  RMSE: 2490.9120


In [22]:
# Final model on all data
final_model = LinearRegression()
final_model.fit(X, y)

test_pred_log = final_model.predict(test)
test_pred = np.exp(test_pred_log)

submission = pd.DataFrame({
    "Id": testRaw.index,
    "SalePrice": test_pred
})

out_path = os.path.join(local_config.SUBMISSIONS_DIR,
                        "linearModel_KFold.csv")
submission.to_csv(out_path, index=False)

print("Saved:", out_path)

Saved: D:\Project\Kaggle\house-prices-starter\data\submissions\linearModel_KFold.csv
