In [1]:
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms
import os
import torch
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim
from sklearn.metrics import mean_absolute_error,mean_squared_error
import numpy as np


In [2]:
class HousingDataset(Dataset):
    def __init__(self,csv_file,img_dir,transform=None):
        self.data=pd.read_csv(csv_file)
        self.img_dir=img_dir
        self.transform=transform

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,idx):
        img_name=os.path.join(self.img_dir,self.data.iloc[idx]['image_name'])
        image=Image.open(img_name).convert("RGB")
        if self.transform:
            image=self.transform(image)
        tabular = self.data.iloc[idx].drop(['price', 'image_name']).values.astype('float32')
        tabular = torch.tensor(tabular)
        price=torch.tensor(self.data.iloc[idx]['price'],dtype=torch.float32)
        return image,tabular,price

In [3]:
transform=transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [4]:
dataset = HousingDataset('data/housing_data.csv', 'data/images/', transform=transform)
train_size=int(0.8*len(dataset))
val_size=len(dataset)-train_size
train_dataset,val_dataset=torch.utils.data.random_split(dataset,[train_size,val_size])


In [5]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

In [7]:
class MultimodalHousingModel(nn.Module):
    def __init__(self, tabular_input_size, cnn_feature_size=512, hidden_size=128):
        super(MultimodalHousingModel, self).__init__()

        # CNN backbone
        self.cnn = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        self.cnn.fc = nn.Identity()  # Remove final layer → extract features

        # Tabular MLP
        self.tabular_mlp = nn.Sequential(
            nn.Linear(tabular_input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU()
        )

        # Fusion + Regression Head
        self.fusion = nn.Sequential(
            nn.Linear(cnn_feature_size + 32, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, 1)
        )

    def forward(self, image, tabular):
        img_features = self.cnn(image)        # [B, 512]
        tab_features = self.tabular_mlp(tabular)  # [B, 32]
        combined = torch.cat([img_features, tab_features], dim=1)  # [B, 544]
        output = self.fusion(combined)
        return output.squeeze(1)  # [B]

In [8]:
device = torch.device('cpu')  # You're on CPU
model = MultimodalHousingModel(tabular_input_size=5).to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\Abdullah/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
100.0%


In [9]:
optimizer = optim.Adam(model.parameters(), lr=1e-4)  # Smart optimizer
criterion = nn.MSELoss()  # Loss function for regression

In [10]:
num_epochs = 50

for epoch in range(num_epochs):
    # ➤ TRAINING PHASE
    model.train()
    train_loss = 0.0
    for images, tabular, prices in train_loader:
        images, tabular, prices = images.to(device), tabular.to(device), prices.to(device)

        optimizer.zero_grad()           # Clear old gradients
        outputs = model(images, tabular) # Robot makes predictions
        loss = criterion(outputs, prices) # How wrong was it?
        loss.backward()                 # Calculate gradients
        optimizer.step()                # Update weights

        train_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {train_loss/len(train_loader):.4f}")

    # ➤ VALIDATION PHASE
    model.eval()
    val_preds, val_targets = [], []
    with torch.no_grad():  # No gradient calculation → faster + less memory
        for images, tabular, prices in val_loader:
            images, tabular, prices = images.to(device), tabular.to(device), prices.to(device)
            outputs = model(images, tabular)
            val_preds.extend(outputs.cpu().numpy())
            val_targets.extend(prices.cpu().numpy())

    mae = mean_absolute_error(val_targets, val_preds)
    rmse = np.sqrt(mean_squared_error(val_targets, val_preds))
    print(f"Validation MAE: {mae:.2f}, RMSE: {rmse:.2f}")

Epoch 1/50, Loss: 335961666355.2000
Validation MAE: 556942.96, RMSE: 607519.21
Epoch 2/50, Loss: 335955836928.0000
Validation MAE: 556938.16, RMSE: 607514.27
Epoch 3/50, Loss: 335950177894.4000
Validation MAE: 556933.24, RMSE: 607509.19
Epoch 4/50, Loss: 335944318976.0000
Validation MAE: 556928.15, RMSE: 607503.99
Epoch 5/50, Loss: 335938496102.4000
Validation MAE: 556922.95, RMSE: 607498.66
Epoch 6/50, Loss: 335932263628.8000
Validation MAE: 556918.22, RMSE: 607493.68
Epoch 7/50, Loss: 335925647769.6000
Validation MAE: 556912.49, RMSE: 607487.87
Epoch 8/50, Loss: 335918986035.2000
Validation MAE: 556906.15, RMSE: 607481.42
Epoch 9/50, Loss: 335911957299.2000
Validation MAE: 556899.80, RMSE: 607474.84
Epoch 10/50, Loss: 335904171622.4000
Validation MAE: 556893.84, RMSE: 607468.49
Epoch 11/50, Loss: 335895992729.6000
Validation MAE: 556886.96, RMSE: 607461.32
Epoch 12/50, Loss: 335886883225.6000
Validation MAE: 556879.22, RMSE: 607453.43
Epoch 13/50, Loss: 335878062080.0000
Validation M