In [2]:

import os, random, numpy as np, pandas as pd
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from torchvision.models import ResNet18_Weights
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error



BASE_DIR = "/content"
DATA_DIR = os.path.join(BASE_DIR, "data")
IMAGE_DIR = os.path.join(DATA_DIR, "images")
CSV_PATH = os.path.join(DATA_DIR, "housing.csv")
os.makedirs(IMAGE_DIR, exist_ok=True)


if not os.path.exists(CSV_PATH):
    print("Creating sample dataset...")
    rows = []

    for i in range(30):
        img_name = f"house_{i}.jpg"
        img_path = os.path.join(IMAGE_DIR, img_name)

        Image.new(
            "RGB",
            (256, 256),
            (random.randint(0,255), random.randint(0,255), random.randint(0,255))
        ).save(img_path)

        rows.append({
            "image_name": img_name,
            "area": float(random.randint(800, 3000)),
            "bedrooms": float(random.randint(1, 5)),
            "bathrooms": float(random.randint(1, 4)),
            "location_score": float(round(random.uniform(5, 9), 2)),
            "price": float(random.randint(120000, 500000))
        })

    pd.DataFrame(rows).to_csv(CSV_PATH, index=False)
    print("Dataset & images created")


class HousingDataset(Dataset):
    def __init__(self, df, image_dir, transform):
        self.df = df.reset_index(drop=True)
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        image = Image.open(
            os.path.join(self.image_dir, row["image_name"])
        ).convert("RGB")
        image = self.transform(image)


        tabular = torch.tensor(
            row[["area","bedrooms","bathrooms","location_score"]]
            .astype(np.float32)
            .values,
            dtype=torch.float32
        )

        price = torch.tensor(float(row["price"]), dtype=torch.float32)
        return image, tabular, price


class ImageEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        resnet = models.resnet18(weights=ResNet18_Weights.DEFAULT)
        self.features = nn.Sequential(*list(resnet.children())[:-1])
        self.fc = nn.Linear(512, 128)

    def forward(self, x):
        x = self.features(x)
        return self.fc(x.view(x.size(0), -1))

class TabularEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(4, 64),
            nn.ReLU(),
            nn.Linear(64, 32)
        )

    def forward(self, x):
        return self.net(x)

class MultimodalRegressor(nn.Module):
    def __init__(self):
        super().__init__()
        self.image_encoder = ImageEncoder()
        self.tabular_encoder = TabularEncoder()
        self.regressor = nn.Sequential(
            nn.Linear(160, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, image, tabular):
        x = torch.cat([
            self.image_encoder(image),
            self.tabular_encoder(tabular)
        ], dim=1)
        return self.regressor(x).squeeze()


df = pd.read_csv(CSV_PATH)

scaler = StandardScaler()
df[["area","bedrooms","bathrooms","location_score"]] = scaler.fit_transform(
    df[["area","bedrooms","bathrooms","location_score"]]
)

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor()
])

train_loader = DataLoader(
    HousingDataset(train_df, IMAGE_DIR, transform),
    batch_size=8, shuffle=True
)

test_loader = DataLoader(
    HousingDataset(test_df, IMAGE_DIR, transform),
    batch_size=8, shuffle=False
)

# TRAIN
model = MultimodalRegressor().to(DEVICE)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(5):
    model.train()
    total_loss = 0

    for images, tabular, prices in train_loader:
        images, tabular, prices = images.to(DEVICE), tabular.to(DEVICE), prices.to(DEVICE)

        optimizer.zero_grad()
        loss = criterion(model(images, tabular), prices)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/5 | Loss: {total_loss/len(train_loader):.4f}")

#  EVALUATE
model.eval()
preds, targets = [], []

with torch.no_grad():
    for images, tabular, prices in test_loader:
        images, tabular = images.to(DEVICE), tabular.to(DEVICE)
        outputs = model(images, tabular)
        preds.extend(outputs.cpu().numpy())
        targets.extend(prices.numpy())

mae = mean_absolute_error(targets, preds)
rmse = np.sqrt(mean_squared_error(targets, preds))

print("\nFINAL RESULTS")
print("MAE :", round(mae, 2))
print("RMSE:", round(rmse, 2))


Using device: cuda
Epoch 1/5 | Loss: 104835612672.0000
Epoch 2/5 | Loss: 104833452714.6667
Epoch 3/5 | Loss: 104829995690.6667
Epoch 4/5 | Loss: 104824190293.3333
Epoch 5/5 | Loss: 104815741610.6667

FINAL RESULTS
MAE : 296780.43
RMSE: 316869.48
