## MODEL TRAINING

# Align Tabular Data with Available Images

In [1]:
import os
import pandas as pd

# Paths
BASE_DIR = r"C:\Users\Sathwika\OneDrive\Desktop\Satellite_property_valuation"
TRAIN_CSV = rf"{BASE_DIR}\Data\processed\train_processed.csv"
IMAGE_DIR = rf"{BASE_DIR}\images\train"

# Load data
train_df = pd.read_csv(TRAIN_CSV)

# IDs that actually have images
available_ids = [
    int(fname.replace(".png", ""))
    for fname in os.listdir(IMAGE_DIR)
]

# Filter train data
train_df_img = train_df[train_df["id"].isin(available_ids)]

print("Train rows with images:", train_df_img.shape)


Train rows with images: (1752, 25)


# Tabular Model Training, Evaluation, and Prediction

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor

# PATHS

BASE_DIR = r"C:\Users\Sathwika\OneDrive\Desktop\Satellite_property_valuation"

TRAIN_CSV = rf"{BASE_DIR}\Data\processed\train_processed.csv"
TEST_CSV  = rf"{BASE_DIR}\Data\processed\test_processed.csv"
SUBMISSION_PATH = rf"{BASE_DIR}\submission.csv"

TARGET_COL = "price"

# LOAD DATA

train_df = pd.read_csv(TRAIN_CSV)
test_df  = pd.read_csv(TEST_CSV)

# Features: drop target & non-features
# Drop target-related columns
X = train_df.drop(columns=[TARGET_COL, "log_price"], errors="ignore")
y = train_df[TARGET_COL]

# Keep ONLY numeric features
X = X.select_dtypes(include=[np.number])

# Align test features to train
X_test = test_df[X.columns]


# TRAIN–VALIDATION SPLIT

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# MODEL

model = RandomForestRegressor(
    n_estimators=300,
    max_depth=20,
    random_state=42,
    n_jobs=-1
)

model.fit(X_train, y_train)

# EVALUATION (SANITY CHECK)

val_preds = model.predict(X_val)

rmse = np.sqrt(mean_squared_error(y_val, val_preds))
r2   = r2_score(y_val, val_preds)

print(f"Validation RMSE: {rmse:.2f}")
print(f"Validation R2  : {r2:.4f}")

# FINAL TEST PREDICTIONS

test_preds = model.predict(X_test)

submission = pd.DataFrame({
    "id": test_df["id"],
    "predicted_price": test_preds
})

submission.to_csv(SUBMISSION_PATH, index=False)

print("✅ Submission file saved:", SUBMISSION_PATH)

Validation RMSE: 130805.91
Validation R2  : 0.8637
✅ Submission file saved: C:\Users\Sathwika\OneDrive\Desktop\Satellite_property_valuation\submission.csv
