<a href="https://colab.research.google.com/github/Momina-Hussnain/Multimodal-ML-Housing-Price-Prediction-Using-Images-Tabular-Data/blob/main/Task_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch, torchvision, sklearn, matplotlib, pandas
print("Torch:", torch.__version__)
print("Torchvision:", torchvision.__version__)


Torch: 2.8.0+cu126
Torchvision: 0.23.0+cu126


In [None]:
%%writefile Multimodal_Housing_Prediction.py
import os
import argparse
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision.models as models

from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split


# -----------------------------
# Dataset
# -----------------------------
class HousingDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None, price_col="Price", image_col="Image"):
        self.data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

        # --- Detect target and image columns ---
        if price_col not in self.data.columns:
            raise ValueError(f"CSV must contain target column (expected '{price_col}'). Found: {self.data.columns}")
        if image_col not in self.data.columns:
            raise ValueError(f"CSV must contain image column (expected '{image_col}'). Found: {self.data.columns}")

        # Targets
        self.prices = self.data[price_col].values.astype(np.float32)

        # Image file names
        self.images = self.data[image_col].values

        # Tabular = all numeric columns except target + image
        drop_cols = [price_col, image_col]
        self.tabular = self.data.drop(columns=drop_cols).values.astype(np.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Load image
        img_path = os.path.join(self.img_dir, self.images[idx])
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        # Tabular and price
        tab = torch.tensor(self.tabular[idx])
        price = torch.tensor(self.prices[idx])

        return image, tab, price


# -----------------------------
# Model
# -----------------------------
class MultimodalModel(nn.Module):
    def __init__(self, tabular_dim):
        super(MultimodalModel, self).__init__()
        # Pretrained CNN (ResNet50 backbone)
        cnn = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        for param in cnn.parameters():
            param.requires_grad = False
        cnn.fc = nn.Identity()  # remove last layer
        self.cnn = cnn

        # Tabular branch
        self.tabular_net = nn.Sequential(
            nn.Linear(tabular_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
        )

        # Fusion + Regression
        self.fc = nn.Sequential(
            nn.Linear(2048 + 64, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )

    def forward(self, image, tabular):
        img_feat = self.cnn(image)           # [batch, 2048]
        tab_feat = self.tabular_net(tabular) # [batch, 64]
        fused = torch.cat((img_feat, tab_feat), dim=1)
        out = self.fc(fused).squeeze(-1)     # [batch]
        return out


# -----------------------------
# Training
# -----------------------------
def train_model(model, train_loader, val_loader, device, epochs=5, lr=1e-3):
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        model.train()
        train_losses = []
        for images, tab, prices in train_loader:
            images, tab, prices = images.to(device), tab.to(device), prices.to(device)

            optimizer.zero_grad()
            outputs = model(images, tab)
            loss = criterion(outputs, prices)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

        # Validation
        model.eval()
        preds, targets = [], []
        with torch.no_grad():
            for images, tab, prices in val_loader:
                images, tab, prices = images.to(device), tab.to(device), prices.to(device)
                outputs = model(images, tab)
                preds.extend(outputs.cpu().numpy())
                targets.extend(prices.cpu().numpy())

        mae = mean_absolute_error(targets, preds)
        rmse = mean_squared_error(targets, preds) ** 0.5
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {np.mean(train_losses):.4f}, "
              f"Val MAE: {mae:.2f}, Val RMSE: {rmse:.2f}")

    return model


# -----------------------------
# Main Pipeline
# -----------------------------
def run_pipeline(csv_file, img_dir, out_dir="outputs", epochs=5, batch_size=16, lr=1e-3,
                 price_col="Price", image_col="Image"):
    os.makedirs(out_dir, exist_ok=True)

    # Load CSV to detect tabular dimensions
    df = pd.read_csv(csv_file)
    tabular_dim = df.drop(columns=[price_col, image_col]).shape[1]

    # Train-val split
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
    train_csv, val_csv = os.path.join(out_dir, "train.csv"), os.path.join(out_dir, "val.csv")
    train_df.to_csv(train_csv, index=False)
    val_df.to_csv(val_csv, index=False)

    # Image transforms
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])

    # Datasets
    train_dataset = HousingDataset(train_csv, img_dir, transform=transform,
                                   price_col=price_col, image_col=image_col)
    val_dataset = HousingDataset(val_csv, img_dir, transform=transform,
                                 price_col=price_col, image_col=image_col)

    # Dataloaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    # Model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Device:", device)
    model = MultimodalModel(tabular_dim).to(device)

    # Train
    model = train_model(model, train_loader, val_loader, device, epochs=epochs, lr=lr)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--csv", type=str, required=True, help="Path to CSV file")
    parser.add_argument("--imdir", type=str, required=True, help="Path to image directory")
    parser.add_argument("--out", type=str, default="outputs", help="Output directory")
    parser.add_argument("--epochs", type=int, default=5)
    parser.add_argument("--batch_size", type=int, default=16)
    parser.add_argument("--lr", type=float, default=1e-3)
    parser.add_argument("--price_col", type=str, default="Price", help="Target column name")
    parser.add_argument("--image_col", type=str, default="Image", help="Image filename column name")
    args = parser.parse_args()

    run_pipeline(args.csv, args.imdir, out_dir=args.out,
                 epochs=args.epochs, batch_size=args.batch_size, lr=args.lr,
                 price_col=args.price_col, image_col=args.image_col)


Overwriting Multimodal_Housing_Prediction.py


In [None]:
import pandas as pd
df = pd.read_csv("./synthetic_demo/data.csv")
print(df.head())


   id       image_path  sqft  bedrooms  bathrooms  year_built          price
0   0  image_00000.jpg  1698         3          1        1973  114752.070965
1   1  image_00001.jpg  1604         2          1        1962   76264.513476
2   2  image_00002.jpg  1369         2          1        1993   78141.671961
3   3  image_00003.jpg  1398         5          4        1978  144442.799065
4   4  image_00004.jpg   968         1          1        2009   50000.000000


In [None]:
!python Multimodal_Housing_Prediction.py \
    --csv ./synthetic_demo/data.csv \
    --imdir ./synthetic_demo/images \
    --out ./outputs \
    --epochs 5 \
    --batch_size 16 \
    --lr 0.001 \
    --price_col price \
    --image_col image_path


Device: cuda
Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100% 97.8M/97.8M [00:00<00:00, 190MB/s]
Epoch 1/5, Train Loss: 15843188462.9333, Val MAE: 107784.95, Val RMSE: 117044.35
Epoch 2/5, Train Loss: 10906355865.6000, Val MAE: 52891.15, Val RMSE: 65697.14
Epoch 3/5, Train Loss: 2525832576.0000, Val MAE: 36118.62, Val RMSE: 44167.53
Epoch 4/5, Train Loss: 1952037196.8000, Val MAE: 34971.51, Val RMSE: 42878.25
Epoch 5/5, Train Loss: 1961802338.1333, Val MAE: 35375.32, Val RMSE: 43328.35


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load full dataset
df = pd.read_csv("./synthetic_demo/data.csv")

# Split into train and validation
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

# Save splits
train_df.to_csv("./synthetic_demo/train.csv", index=False)
val_df.to_csv("./synthetic_demo/val.csv", index=False)

print("Train size:", len(train_df))
print("Val size:", len(val_df))


Train size: 480
Val size: 120


In [None]:
!python Multimodal_Housing_Prediction.py \
    --csv ./synthetic_demo/train.csv \
    --imdir ./synthetic_demo/images \
    --out ./outputs \
    --epochs 5 \
    --batch_size 16 \
    --lr 0.001 \
    --price_col price \
    --image_col image_path


Device: cuda
Epoch 1/5, Train Loss: 15812474837.3333, Val MAE: 119523.66, Val RMSE: 128829.60
Epoch 2/5, Train Loss: 14129655893.3333, Val MAE: 102587.98, Val RMSE: 113119.13
Epoch 3/5, Train Loss: 7520639893.3333, Val MAE: 44456.59, Val RMSE: 58310.49
Epoch 4/5, Train Loss: 2108135464.0000, Val MAE: 35485.33, Val RMSE: 46769.50
Epoch 5/5, Train Loss: 1923024296.0000, Val MAE: 34575.50, Val RMSE: 47113.97


In [None]:
!python Multimodal_Housing_Prediction.py \
    --csv ./synthetic_demo/train.csv \
    --imdir ./synthetic_demo/images \
    --out ./outputs \
    --epochs 5 \
    --batch_size 16 \
    --lr 0.001 \
    --price_col price \
    --image_col image_path


Device: cuda
Epoch 1/5, Train Loss: 15843209813.3333, Val MAE: 119734.22, Val RMSE: 129030.60
Epoch 2/5, Train Loss: 14135594752.0000, Val MAE: 102040.63, Val RMSE: 112650.04
Epoch 3/5, Train Loss: 7239960842.6667, Val MAE: 42623.34, Val RMSE: 55893.12
Epoch 4/5, Train Loss: 2169777712.0000, Val MAE: 34732.40, Val RMSE: 46437.04
Epoch 5/5, Train Loss: 1895639626.6667, Val MAE: 34533.73, Val RMSE: 47092.90


In [None]:
!python Multimodal_Housing_Prediction.py \
    --csv ./synthetic_demo/data.csv \
    --imdir ./synthetic_demo/images \
    --out ./outputs \
    --epochs 5 \
    --batch_size 16 \
    --lr 0.001 \
    --price_col price \
    --image_col image_path


Device: cuda
Epoch 1/5, Train Loss: 15997082043.7333, Val MAE: 110553.66, Val RMSE: 119648.88
Epoch 2/5, Train Loss: 12925920256.0000, Val MAE: 75635.26, Val RMSE: 87916.30
Epoch 3/5, Train Loss: 4106953587.2000, Val MAE: 37960.64, Val RMSE: 46607.47
Epoch 4/5, Train Loss: 1997263863.4667, Val MAE: 35377.29, Val RMSE: 43198.66
Epoch 5/5, Train Loss: 1971524467.2000, Val MAE: 35268.11, Val RMSE: 43105.51
