In [2]:
# Task 3: Multimodal Housing Price Prediction (Self-Contained)

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
import numpy as np
import pandas as pd
import os

# -----------------------------
# Step 1: Generate Fake Housing Dataset
# -----------------------------
os.makedirs("dummy_houses", exist_ok=True)

num_samples = 200
# Tabular data
tabular_data = pd.DataFrame({
    "rooms": np.random.randint(1, 6, num_samples),
    "area": np.random.randint(500, 3500, num_samples),
    "location_score": np.random.uniform(0, 1, num_samples),
    "price": np.random.randint(50_000, 500_000, num_samples)  # target variable
})

# Save dummy images
for i in range(num_samples):
    img = Image.fromarray(np.uint8(np.random.rand(128, 128, 3) * 255))  # noisy RGB image
    img.save(f"dummy_houses/house_{i}.jpg")

# -----------------------------
# Step 2: Dataset Class
# -----------------------------
class HousingDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.df = dataframe
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        tabular_features = self.df.iloc[idx][["rooms", "area", "location_score"]].values.astype(np.float32)
        target = np.float32(self.df.iloc[idx]["price"])

        img_path = os.path.join(self.img_dir, f"house_{idx}.jpg")
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)

        return img, torch.tensor(tabular_features), torch.tensor(target)

# -----------------------------
# Step 3: DataLoader
# -----------------------------
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

dataset = HousingDataset(tabular_data, "dummy_houses", transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

# -----------------------------
# Step 4: Multimodal Model
# -----------------------------
class MultiModalNet(nn.Module):
    def __init__(self, tabular_input_dim):
        super(MultiModalNet, self).__init__()
        # CNN backbone (ResNet18 pretrained)
        self.cnn = models.resnet18(pretrained=True)
        self.cnn.fc = nn.Identity()  # keep features (512-dim)

        # Tabular branch
        self.fc_tab = nn.Sequential(
            nn.Linear(tabular_input_dim, 64),
            nn.ReLU()
        )

        # Final regression
        self.fc_final = nn.Sequential(
            nn.Linear(512 + 64, 1)
        )

    def forward(self, img, tabular):
        img_feat = self.cnn(img)               # (batch, 512)
        tab_feat = self.fc_tab(tabular)        # (batch, 64)
        combined = torch.cat((img_feat, tab_feat), dim=1)
        return self.fc_final(combined).squeeze()

# -----------------------------
# Step 5: Train Model
# -----------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MultiModalNet(tabular_input_dim=3).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train 2 epochs (demo)
for epoch in range(2):
    model.train()
    total_loss = 0
    for imgs, tabs, targets in dataloader:
        imgs, tabs, targets = imgs.to(device), tabs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(imgs, tabs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(dataloader):.4f}")




Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\kiran/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth


100%|██████████████████████████████████████████████████████████████████████████████| 44.7M/44.7M [02:14<00:00, 348kB/s]


Epoch 1, Loss: 93241155584.0000
Epoch 2, Loss: 94191285484.3077


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision.models import resnet18, ResNet18_Weights
from sklearn.preprocessing import StandardScaler
from PIL import Image
import numpy as np
import pandas as pd
import os

os.makedirs("dummy_houses", exist_ok=True)

num_samples = 200
tabular_data = pd.DataFrame({
    "rooms": np.random.randint(1, 6, num_samples),
    "area": np.random.randint(500, 3500, num_samples),
    "location_score": np.random.uniform(0, 1, num_samples),
    "price": np.random.randint(50_000, 500_000, num_samples)  # target variable
})

# Normalize target to thousands
tabular_data["price"] = tabular_data["price"] / 1000.0

# Scale tabular features
scaler = StandardScaler()
tabular_data[["rooms", "area", "location_score"]] = scaler.fit_transform(
    tabular_data[["rooms", "area", "location_score"]]
)

# Save dummy images (random noise)
for i in range(num_samples):
    img = Image.fromarray(np.uint8(np.random.rand(128, 128, 3) * 255))
    img.save(f"dummy_houses/house_{i}.jpg")

class HousingDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.df = dataframe
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        tabular_features = self.df.iloc[idx][["rooms", "area", "location_score"]].values.astype(np.float32)
        target = np.float32(self.df.iloc[idx]["price"])

        img_path = os.path.join(self.img_dir, f"house_{idx}.jpg")
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)

        return img, torch.tensor(tabular_features), torch.tensor(target)

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

dataset = HousingDataset(tabular_data, "dummy_houses", transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

class MultiModalNet(nn.Module):
    def __init__(self, tabular_input_dim):
        super(MultiModalNet, self).__init__()
        # CNN backbone (ResNet18 pretrained)
        self.cnn = resnet18(weights=ResNet18_Weights.DEFAULT)
        self.cnn.fc = nn.Identity()  # keep features (512-dim)

        # Tabular branch
        self.fc_tab = nn.Sequential(
            nn.Linear(tabular_input_dim, 64),
            nn.ReLU()
        )

        # Final regression
        self.fc_final = nn.Sequential(
            nn.Linear(512 + 64, 1)
        )

    def forward(self, img, tabular):
        img_feat = self.cnn(img)               # (batch, 512)
        tab_feat = self.fc_tab(tabular)        # (batch, 64)
        combined = torch.cat((img_feat, tab_feat), dim=1)
        return self.fc_final(combined).squeeze()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MultiModalNet(tabular_input_dim=3).to(device)
criterion = nn.L1Loss()   # MAE instead of MSE
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train for 5 epochs (demo)
for epoch in range(5):
    model.train()
    total_loss = 0
    for imgs, tabs, targets in dataloader:
        imgs, tabs, targets = imgs.to(device), tabs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(imgs, tabs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(dataloader):.4f}")


Epoch 1, Loss: 279.3307
Epoch 2, Loss: 271.8826
Epoch 3, Loss: 265.0653
Epoch 4, Loss: 253.2413
Epoch 5, Loss: 241.0056
