## Model Training on zoom=15 260x260 resolution images

In [1]:
import os
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score

import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from tqdm import tqdm


In [2]:
train = pd.read_csv("../data/raw/train.csv")

available_images = set(
    int(f.replace(".png", ""))
    for f in os.listdir("../data/images_2/train_2")
)

train = train[train["id"].isin(available_images)].reset_index(drop=True)

print("Final training rows:", len(train))


Final training rows: 16209


In [3]:
train["log_price"] = np.log1p(train["price"])


In [4]:
def feature_engineering(df):
    df = df.copy()

    df["basement_ratio"] = df["sqft_basement"] / (df["sqft_living"] + 1)
    df["above_ratio"]    = df["sqft_above"] / (df["sqft_living"] + 1)

    df["relative_living_size"] = df["sqft_living"] / (df["sqft_living15"] + 1)
    df["relative_lot_size"]    = df["sqft_lot"] / (df["sqft_lot15"] + 1)

    df["luxury_grade"]   = (df["grade"] >= 9).astype(int)
    df["good_condition"] = (df["condition"] >= 4).astype(int)

    lat_center = df["lat"].mean()
    lon_center = df["long"].mean()

    df["dist_to_center"] = np.sqrt(
        (df["lat"] - lat_center) ** 2 +
        (df["long"] - lon_center) ** 2
    )

    df["lat_long_interaction"] = df["lat"] * df["long"]

    return df


In [5]:
train_fe = feature_engineering(train)


In [6]:
X_tab = train_fe.drop(columns=["id", "date", "price", "log_price"])
y     = train_fe["log_price"].values
ids   = train_fe["id"].values

X_train_tab, X_val_tab, y_train, y_val, ids_train, ids_val = train_test_split(
    X_tab, y, ids, test_size=0.2, random_state=42
)


In [7]:
scaler = StandardScaler()

X_train_tab = scaler.fit_transform(X_train_tab)
X_val_tab   = scaler.transform(X_val_tab)


In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"

cnn = models.resnet18(pretrained=True)
cnn.fc = torch.nn.Identity()
cnn = cnn.to(device)
cnn.eval()




ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [9]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


In [10]:
def extract_embeddings(ids, img_dir):
    embeddings = []

    for pid in tqdm(ids):
        img_path = f"{img_dir}/{pid}.png"
        img = Image.open(img_path).convert("RGB")
        img = transform(img).unsqueeze(0).to(device)

        with torch.no_grad():
            emb = cnn(img).cpu().numpy().flatten()

        embeddings.append(emb)

    return np.vstack(embeddings)


In [11]:
X_train_img = extract_embeddings(ids_train, "../data/images_2/train_2")
X_val_img   = extract_embeddings(ids_val,   "../data/images_2/train_2")

print(X_train_tab.shape, X_train_img.shape)
print(X_val_tab.shape,   X_val_img.shape)


100%|████████████████████████████████████████████████████████████████████████████| 12967/12967 [34:14<00:00,  6.31it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 3242/3242 [06:15<00:00,  8.64it/s]

(12967, 26) (12967, 512)
(3242, 26) (3242, 512)





In [12]:
X_train_fused = np.hstack([X_train_tab, X_train_img])
X_val_fused   = np.hstack([X_val_tab,   X_val_img])


In [13]:
model = Ridge(alpha=1.0)
model.fit(X_train_fused, y_train)


In [14]:
y_val_pred = model.predict(X_val_fused)

rmse_log = np.sqrt(mean_squared_error(y_val, y_val_pred))
r2 = r2_score(y_val, y_val_pred)

rmse_price = np.sqrt(
    mean_squared_error(np.expm1(y_val), np.expm1(y_val_pred))
)

print(f"Multimodal RMSE (log): {rmse_log:.4f}")
print(f"Multimodal R²       : {r2:.4f}")
print(f"Multimodal RMSE (₹/$): {rmse_price:.2f}")


Multimodal RMSE (log): 0.2305
Multimodal R²       : 0.8074
Multimodal RMSE (₹/$): 160196.58


## Another model Resnet-34


In [39]:
device = "cuda" if torch.cuda.is_available() else "cpu"
cnn = models.resnet34(pretrained=True)
cnn.fc = torch.nn.Identity()   # remove classifier head
cnn = cnn.to(device)
cnn.eval()

for p in cnn.parameters():
    p.requires_grad = True




In [41]:
X_train_img = extract_embeddings(ids_train, "../data/images_2/train_2")
X_val_img   = extract_embeddings(ids_val, "../data/images_2/train_2")



100%|██████████████████████████████████████████████████████████████████████████████| 3242/3242 [13:38<00:00,  3.96it/s]


In [42]:
X_train_fused = np.hstack([X_train_tab, X_train_img])
X_val_fused   = np.hstack([X_val_tab,   X_val_img])


In [43]:
model = Ridge(alpha=1.0)
model.fit(X_train_fused, y_train)


In [44]:
y_val_pred = model.predict(X_val_fused)


In [45]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Predict in log space
y_val_pred = model.predict(X_val_fused)

# 1️⃣ RMSE in log space
rmse_log = np.sqrt(mean_squared_error(y_val, y_val_pred))

# 2️⃣ R² score
r2 = r2_score(y_val, y_val_pred)

# 3️⃣ RMSE in original price space
y_val_true_price = np.expm1(y_val)
y_val_pred_price = np.expm1(y_val_pred)

rmse_price = np.sqrt(
    mean_squared_error(y_val_true_price, y_val_pred_price)
)

print(f"RMSE (log)      : {rmse_log:.4f}")
print(f"R² Score        : {r2:.4f}")
print(f"RMSE (₹/$)      : {rmse_price:,.2f}")


RMSE (log)      : 0.2298
R² Score        : 0.8087
RMSE (₹/$)      : 161,178.83
