Objective:
Fine-tune the CNN encoder on satellite imagery to adapt pretrained ImageNet features to the real-estate valuation domain.

In [12]:
import pandas as pd

# Load training CSV
train = pd.read_csv("../data/raw/train.csv")

print(train.shape)
train.head()


(16209, 21)


Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,9117000170,20150505T000000,268643,4,2.25,1810,9240,2.0,0,0,...,7,1810,0,1961,0,98055,47.4362,-122.187,1660,9240
1,6700390210,20140708T000000,245000,3,2.5,1600,2788,2.0,0,0,...,7,1600,0,1992,0,98031,47.4034,-122.187,1720,3605
2,7212660540,20150115T000000,200000,4,2.5,1720,8638,2.0,0,0,...,8,1720,0,1994,0,98003,47.2704,-122.313,1870,7455
3,8562780200,20150427T000000,352499,2,2.25,1240,705,2.0,0,0,...,7,1150,90,2009,0,98027,47.5321,-122.073,1240,750
4,7760400350,20141205T000000,232000,3,2.0,1280,13356,1.0,0,0,...,7,1280,0,1994,0,98042,47.3715,-122.074,1590,8071


In [14]:
import os

available_images = set(
    int(f.replace(".png", "")) 
    for f in os.listdir("../data/images/train")
)

train = train[train["id"].isin(available_images)].reset_index(drop=True)

print("Filtered train size:", train.shape)


Filtered train size: (16209, 21)


In [16]:
import numpy as np

train["log_price"] = np.log1p(train["price"])


In [2]:
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision.models import ResNet18_Weights

device = "cuda" if torch.cuda.is_available() else "cpu"

cnn = models.resnet18(weights=ResNet18_Weights.DEFAULT)
num_features = cnn.fc.in_features
cnn.fc = nn.Identity()   # remove classifier

# Regression head
reg_head = nn.Sequential(
    nn.Linear(num_features, 256),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(256, 1)
)

cnn = cnn.to(device)
reg_head = reg_head.to(device)


In [3]:
for name, param in cnn.named_parameters():
    if "layer4" not in name:
        param.requires_grad = False


In [4]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image

class PropertyImageDataset(Dataset):
    def __init__(self, df, image_dir, transform):
        self.df = df.reset_index(drop=True)
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(f"{self.image_dir}/{row['id']}.png").convert("RGB")
        img = self.transform(img)
        y = torch.tensor(row["log_price"], dtype=torch.float32)
        return img, y


In [18]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485,0.456,0.406],
        std=[0.229,0.224,0.225]
    )
])

train_ds = PropertyImageDataset(train, "../data/images/train", transform)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)


In [20]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(
    list(cnn.layer4.parameters()) + list(reg_head.parameters()),
    lr=1e-4
)


In [25]:
cnn.train()
reg_head.train()

for epoch in range(5):
    running_loss = 0.0
    for imgs, y in train_loader:
        imgs = imgs.to(device)
        y = y.to(device).unsqueeze(1)

        optimizer.zero_grad()
        feats = cnn(imgs)
        preds = reg_head(feats)
        loss = criterion(preds, y)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}")


Epoch 1, Loss: 2.7040
Epoch 2, Loss: 0.8614
Epoch 3, Loss: 0.7632
Epoch 4, Loss: 0.7468
Epoch 5, Loss: 0.7110


In [27]:
cnn.eval()
for p in cnn.parameters():
    p.requires_grad = False


In [31]:
torch.save(cnn.state_dict(), "../models/cnn_finetuned.pth")
