In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [2]:
TRAIN_PATH = Path("../data/raw/train(1).xlsx")
IMG_TRAIN_DIR = Path("../data/images/train")

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)


Device: cpu


In [3]:
df = pd.read_excel(TRAIN_PATH)
print("Loaded:", df.shape)
df.head()


Loaded: (16209, 21)


Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,9117000170,20150505T000000,268643,4,2.25,1810,9240,2.0,0,0,...,7,1810,0,1961,0,98055,47.4362,-122.187,1660,9240
1,6700390210,20140708T000000,245000,3,2.5,1600,2788,2.0,0,0,...,7,1600,0,1992,0,98031,47.4034,-122.187,1720,3605
2,7212660540,20150115T000000,200000,4,2.5,1720,8638,2.0,0,0,...,8,1720,0,1994,0,98003,47.2704,-122.313,1870,7455
3,8562780200,20150427T000000,352499,2,2.25,1240,705,2.0,0,0,...,7,1150,90,2009,0,98027,47.5321,-122.073,1240,750
4,7760400350,20141205T000000,232000,3,2.0,1280,13356,1.0,0,0,...,7,1280,0,1994,0,98042,47.3715,-122.074,1590,8071


In [4]:
# Use only numeric columns for a clean baseline multimodal model
drop_cols = ["price", "id"]
tab_cols = [c for c in df.columns if c not in drop_cols]
tab_cols = [c for c in tab_cols if np.issubdtype(df[c].dtype, np.number)]

print("Tabular numeric columns:", len(tab_cols))
tab_cols[:10]


Tabular numeric columns: 18


['bedrooms',
 'bathrooms',
 'sqft_living',
 'sqft_lot',
 'floors',
 'waterfront',
 'view',
 'condition',
 'grade',
 'sqft_above']

In [5]:
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

scaler = StandardScaler()
train_df[tab_cols] = scaler.fit_transform(train_df[tab_cols])
val_df[tab_cols]   = scaler.transform(val_df[tab_cols])

print("Train size:", train_df.shape)
print("Val size:", val_df.shape)


Train size: (12967, 21)
Val size: (3242, 21)


In [6]:
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset

class HouseDataset(Dataset):
    def __init__(self, df, image_dir, tabular_cols, target_col=None):
        self.df = df.reset_index(drop=True)
        self.image_dir = Path(image_dir)
        self.tabular_cols = tabular_cols
        self.target_col = target_col

        self.tf = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        pid = row["id"]

        img_path = self.image_dir / f"{pid}.png"
        if img_path.exists():
            try:
                img = Image.open(img_path).convert("RGB")
            except Exception:
                img = Image.new("RGB", (224, 224), color=(0, 0, 0))
        else:
            img = Image.new("RGB", (224, 224), color=(0, 0, 0))

        img = self.tf(img)

        tab = row[self.tabular_cols].values.astype(np.float32)
        tab = torch.from_numpy(tab)

        if self.target_col is not None:
            y = np.log1p(float(row[self.target_col]))
            y = torch.tensor(y, dtype=torch.float32)
            return img, tab, y

        return img, tab, pid


In [7]:
train_ds = HouseDataset(train_df, IMG_TRAIN_DIR, tab_cols, target_col="price")
val_ds   = HouseDataset(val_df, IMG_TRAIN_DIR, tab_cols, target_col="price")

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, num_workers=0)
val_loader   = DataLoader(val_ds, batch_size=32, shuffle=False, num_workers=0)

print("Train batches:", len(train_loader))
print("Val batches:", len(val_loader))


Train batches: 406
Val batches: 102


In [8]:
import torchvision.models as models

class TabularMLP(nn.Module):
    def __init__(self, in_features, hidden=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_features, hidden),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden, hidden),
            nn.ReLU(),
        )

    def forward(self, x):
        return self.net(x)

class FusionRegressor(nn.Module):
    def __init__(self, tab_in, img_emb_dim=512, tab_emb_dim=128):
        super().__init__()

        backbone = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        backbone.fc = nn.Identity()
        self.cnn = backbone

        self.tab_mlp = TabularMLP(tab_in, hidden=tab_emb_dim)

        self.head = nn.Sequential(
            nn.Linear(img_emb_dim + tab_emb_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(256, 1)
        )

    def forward(self, img, tab):
        img_feat = self.cnn(img)
        tab_feat = self.tab_mlp(tab)
        fused = torch.cat([img_feat, tab_feat], dim=1)
        out = self.head(fused).squeeze(1)
        return out

model = FusionRegressor(tab_in=len(tab_cols)).to(DEVICE)
print(model)


FusionRegressor(
  (cnn): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track

In [9]:
def rmse(y_true, y_pred):
    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))

loss_fn = nn.MSELoss()
opt = torch.optim.AdamW(model.parameters(), lr=2e-4, weight_decay=1e-4)


In [None]:
EPOCHS = 3  # demonstration training for notebook

for epoch in range(1, EPOCHS + 1):
    model.train()
    train_losses = []

    for img, tab, y in train_loader:v
        img, tab, y = img.to(DEVICE), tab.to(DEVICE), y.to(DEVICE)

        pred = model(img, tab)
        loss = loss_fn(pred, y)

        opt.zero_grad()
        loss.backward()
        opt.step()

        train_losses.append(loss.item())

    # Validation
    model.eval()
    ys, ps = [], []

    with torch.no_grad():
        for img, tab, y in val_loader:
            img, tab = img.to(DEVICE), tab.to(DEVICE)
            pred = model(img, tab).cpu().numpy()

            ys.append(y.numpy())
            ps.append(pred)

    y_true = np.concatenate(ys)
    y_pred = np.concatenate(ps)
    val_rmse = rmse(y_true, y_pred)

    print(f"Epoch {epoch}/{EPOCHS} | TrainLoss={np.mean(train_losses):.4f} | Val RMSE(log)={val_rmse:.4f}")


In [None]:
CKPT_PATH = Path("../outputs/models/notebook_multimodal.pt")
CKPT_PATH.parent.mkdir(parents=True, exist_ok=True)

torch.save({
    "model": model.state_dict(),
    "scaler_mean": scaler.mean_,
    "scaler_scale": scaler.scale_,
    "tab_cols": tab_cols
}, CKPT_PATH)

print("Saved checkpoint:", CKPT_PATH)
