**Multimodal ML – Housing Price Prediction Using Images+ Tabular Data  **


In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [2]:
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7fdf21ee3d90>

In [3]:
n_samples = 2000
n_tabular_features = 8
image_size = 32
n_channels = 3
tabular_data = np.random.rand(n_samples, n_tabular_features)

In [4]:
# Prices as linear combo + noise
coefficients = np.random.rand(n_tabular_features)
prices = np.dot(tabular_data, coefficients) + np.random.normal(0, 1, n_samples)
images = np.random.rand(n_samples, n_channels, image_size, image_size).astype(np.float32)

In [5]:
# Standardize tabular
mean = np.mean(tabular_data, axis=0)
std = np.std(tabular_data, axis=0)
tabular_data = (tabular_data - mean) / std

In [6]:
# Split
indices = np.arange(n_samples)
np.random.shuffle(indices)
split = int(0.8 * n_samples)
train_idx = indices[:split]
test_idx = indices[split:]
X_tab_train = tabular_data[train_idx]
X_tab_test = tabular_data[test_idx]
X_img_train = images[train_idx]
X_img_test = images[test_idx]
y_train = prices[train_idx]
y_test = prices[test_idx]

In [7]:
# Dataset
class HousingDataset(Dataset):
    def __init__(self, tabular, images, prices=None):
        self.tabular = torch.tensor(tabular, dtype=torch.float32)
        self.images = torch.tensor(images, dtype=torch.float32)
        self.prices = torch.tensor(prices, dtype=torch.float32) if prices is not None else None

    def __len__(self):
        return len(self.tabular)

    def __getitem__(self, idx):
        if self.prices is not None:
            return self.tabular[idx], self.images[idx], self.prices[idx]
        return self.tabular[idx], self.images[idx]

In [8]:
train_ds = HousingDataset(X_tab_train, X_img_train, y_train)
test_ds = HousingDataset(X_tab_test, X_img_test, y_test)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=32)

In [9]:
# CNN for feature extraction
class CNNFeatureExtractor(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.fc = nn.Linear(32 * 8 * 8, 128)  # After two pools, 32/4=8

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 32 * 8 * 8)
        x = torch.relu(self.fc(x))
        return x

In [10]:
# Multimodal model
class MultimodalModel(nn.Module):
    def __init__(self, tabular_input_size, image_feature_size):
        super().__init__()
        self.cnn = CNNFeatureExtractor()
        self.tabular_fc = nn.Linear(tabular_input_size, 64)
        self.combined_fc1 = nn.Linear(64 + 128, 128)
        self.combined_fc2 = nn.Linear(128, 1)

    def forward(self, tabular, images):
        img_features = self.cnn(images)
        tab_features = torch.relu(self.tabular_fc(tabular))
        combined = torch.cat((tab_features, img_features), dim=1)
        x = torch.relu(self.combined_fc1(combined))
        x = self.combined_fc2(x)
        return x.squeeze()

model = MultimodalModel(n_tabular_features, 128)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

In [12]:
# Train
epochs = 10
for epoch in range(epochs):
    model.train()
    for tab, img, price in train_loader:
        optimizer.zero_grad()
        pred = model(tab, img)
        loss = criterion(pred, price)
        loss.backward()
        optimizer.step()

In [13]:
# Evaluate
model.eval()
preds = []
true = []
with torch.no_grad():
    for tab, img, price in test_loader:
        pred = model(tab, img)
        preds.extend(pred.numpy())
        true.extend(price.numpy())

In [14]:
preds = np.array(preds)
true = np.array(true)

In [15]:
mae = np.mean(np.abs(true - preds))
rmse = np.sqrt(np.mean((true - preds)**2))
print(f"MAE: {mae:.3f}")
print(f"RMSE: {rmse:.3f}")

MAE: 1.023
RMSE: 1.258
