In [None]:
import numpy as np
import pandas as pd
from pathlib import Path

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset

from torchvision import models, transforms
from PIL import Image


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device


device(type='cpu')

Image Preprocessing Pipeline

In [None]:
img_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.485, 0.456, 0.406),
        std=(0.229, 0.224, 0.225)
    )
])


Custom Dataset (Images Only)

In [None]:
class PropertyImageDataset(Dataset):
    def __init__(self, table, transform=None):
        self.data = table.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        img_file = self.data.loc[index, "image_path"]
        img = Image.open(img_file).convert("RGB")

        if self.transform:
            img = self.transform(img)

        return img


Load Image-Linked Metadata

In [None]:
BASE_DIR = Path.cwd().parent if "notebooks" in str(Path.cwd()) else Path.cwd()

img_meta = pd.read_csv("/content/drive/MyDrive/satellite-property-valuation/data/processed/train_with_images.csv")

img_meta["id"] = img_meta["id"].astype(float)

IMG_ROOT = BASE_DIR / "/content/drive/MyDrive/satellite-property-valuation/data/images"

img_meta["image_path"] = img_meta["id"].apply(
    lambda v: IMG_ROOT / f"{v}.png"
)

img_meta["exists"] = img_meta["image_path"].apply(lambda p: p.exists())
img_meta = img_meta[img_meta["exists"]].reset_index(drop=True)

img_meta.shape


(5500, 23)

In [None]:
img_meta[["id", "price", "image_path"]].head()

Unnamed: 0,id,price,image_path
0,1105001000.0,240000,/content/drive/MyDrive/satellite-property-valu...
1,399000200.0,200000,/content/drive/MyDrive/satellite-property-valu...
2,522059300.0,157500,/content/drive/MyDrive/satellite-property-valu...
3,1061400000.0,240000,/content/drive/MyDrive/satellite-property-valu...
4,1099600000.0,210000,/content/drive/MyDrive/satellite-property-valu...


CNN Backbone (Feature Extractor)

In [None]:
resnet_model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# remove classifier
resnet_model.fc = nn.Identity()

# freeze backbone
for p in resnet_model.parameters():
    p.requires_grad = False

resnet_model = resnet_model.to(device)
resnet_model.eval()


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

Extract Image Embeddings

In [None]:
img_ds = PropertyImageDataset(
    table=img_meta,
    transform=img_transform
)

img_dl = DataLoader(
    img_ds,
    batch_size=32,
    shuffle=False,
    num_workers=0
)


In [None]:
features = []

with torch.no_grad():
    for batch in img_dl:
        batch = batch.to(device)
        vecs = resnet_model(batch)
        features.append(vecs.cpu().numpy())

image_features = np.vstack(features)
image_features.shape


(5500, 512)

Save Embeddings & Targets

In [None]:
np.save(
     "/content/drive/MyDrive/satellite-property-valuation/data/processed/image_embeddings.npy",
    image_features
)

img_meta[["id", "price"]].to_csv(
     "/content/drive/MyDrive/satellite-property-valuation/data/processed/image_targets.csv",
    index=False
)


Load Training Arrays

In [None]:
X = np.load("/content/drive/MyDrive/satellite-property-valuation/data/processed/image_embeddings.npy")

targets = pd.read_csv("/content/drive/MyDrive/satellite-property-valuation/data/processed/image_targets.csv")
y = np.log1p(targets["price"].values)

X.shape, y.shape


((5500, 512), (5500,))

Trainâ€“Validation Split

In [None]:
from sklearn.model_selection import train_test_split

X_tr, X_va, y_tr, y_va = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)


Torch Datasets

In [None]:
X_tr_t = torch.tensor(X_tr, dtype=torch.float32)
y_tr_t = torch.tensor(y_tr, dtype=torch.float32).unsqueeze(1)

X_va_t = torch.tensor(X_va, dtype=torch.float32)
y_va_t = torch.tensor(y_va, dtype=torch.float32).unsqueeze(1)

train_set = TensorDataset(X_tr_t, y_tr_t)
val_set   = TensorDataset(X_va_t, y_va_t)

train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
val_loader   = DataLoader(val_set, batch_size=64, shuffle=False)


Image-Only Regression Head

In [None]:
class ImageRegressor(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )

    def forward(self, x):
        return self.layers(x)


In [None]:
img_model = ImageRegressor().to(device)

loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(img_model.parameters(), lr=1e-3)


Training Loop

In [None]:
epochs = 10

for ep in range(epochs):
    img_model.train()
    total_loss = 0.0

    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)

        optimizer.zero_grad()
        preds = img_model(xb)
        loss = loss_fn(preds, yb)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * xb.size(0)

    total_loss /= len(train_loader.dataset)

    img_model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = img_model(xb)
            val_loss += loss_fn(preds, yb).item() * xb.size(0)

    val_loss /= len(val_loader.dataset)

    print(
        f"Epoch {ep+1}/{epochs} | "
        f"Train MSE: {total_loss:.4f} | "
        f"Val MSE: {val_loss:.4f}"
    )


Epoch 1/10 | Train MSE: 14.2590 | Val MSE: 2.5225
Epoch 2/10 | Train MSE: 1.9337 | Val MSE: 1.4836
Epoch 3/10 | Train MSE: 1.3347 | Val MSE: 1.1220
Epoch 4/10 | Train MSE: 1.0458 | Val MSE: 0.9036
Epoch 5/10 | Train MSE: 0.8757 | Val MSE: 0.8233
Epoch 6/10 | Train MSE: 0.7750 | Val MSE: 0.7065
Epoch 7/10 | Train MSE: 0.6867 | Val MSE: 0.6398
Epoch 8/10 | Train MSE: 0.6289 | Val MSE: 0.6002
Epoch 9/10 | Train MSE: 0.5911 | Val MSE: 0.5699
Epoch 10/10 | Train MSE: 0.5589 | Val MSE: 0.5667


Final Evaluation

In [None]:
img_model.eval()

pred_log, true_log = [], []

with torch.no_grad():
    for xb, yb in val_loader:
        xb = xb.to(device)
        out = img_model(xb)

        pred_log.append(out.cpu().numpy())
        true_log.append(yb.cpu().numpy())

pred_log = np.vstack(pred_log).ravel()
true_log = np.vstack(true_log).ravel()


In [None]:
from sklearn.metrics import root_mean_squared_error, r2_score

pred_price = np.expm1(pred_log)
true_price = np.expm1(true_log)

rmse = root_mean_squared_error(true_price, pred_price)
r2 = r2_score(true_price, pred_price)

rmse, r2


(549792.75, -1.2756156921386719)

Image-only model captures visual neighborhood context

Performance is weaker than tabular baseline

Confirms that images alone are insufficient

Justifies multimodal fusion (tabular + image)