<a href="https://colab.research.google.com/github/Abdullah-Tanweer/DEVELOPERS-HUB-AI-INTERNSHIP/blob/main/TASK_3_DEV_HUB_(ADV).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install torch torchvision pandas scikit-learn matplotlib pillow


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("housing_data.csv")  # Includes 'image_path' and tabular features + target
df.dropna(inplace=True)

# Split
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)


In [None]:
import torch
from torchvision import models, transforms
from PIL import Image
import os

# Pretrained CNN (ResNet18)
resnet = models.resnet18(pretrained=True)
resnet.fc = torch.nn.Identity()  # Remove final classification layer
resnet.eval()

# Image transforms
image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Feature extractor
def extract_image_features(image_path):
    img = Image.open(image_path).convert("RGB")
    img_tensor = image_transform(img).unsqueeze(0)  # Add batch dimension
    with torch.no_grad():
        features = resnet(img_tensor)
    return features.squeeze().numpy()  # 512-dim vector


In [None]:
from sklearn.preprocessing import StandardScaler

# Define tabular features (e.g., bedrooms, bathrooms, sqft)
tabular_features = ['bedrooms', 'bathrooms', 'sqft', 'year_built']  # example
target = 'price'

# Fit scaler on train set
scaler = StandardScaler()
scaler.fit(train_df[tabular_features])


In [None]:
import numpy as np

def build_feature_matrix(df):
    image_features = np.stack([extract_image_features(os.path.join("images", p)) for p in df['image_path']])
    tabular_scaled = scaler.transform(df[tabular_features])
    combined = np.hstack([image_features, tabular_scaled])
    return combined, df[target].values

X_train, y_train = build_feature_matrix(train_df)
X_test, y_test = build_feature_matrix(test_df)


In [None]:
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# Convert to torch tensors
train_ds = TensorDataset(torch.tensor(X_train, dtype=torch.float32),
                         torch.tensor(y_train, dtype=torch.float32).unsqueeze(1))
test_ds = TensorDataset(torch.tensor(X_test, dtype=torch.float32),
                        torch.tensor(y_test, dtype=torch.float32).unsqueeze(1))

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=32)

# Model definition
class MultimodalRegressor(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),


In [None]:
# Training loop
for epoch in range(20):
    model.train()
    running_loss = 0.0
    for xb, yb in train_loader:
        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}")


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

model.eval()
with torch.no_grad():
    preds = model(torch.tensor(X_test, dtype=torch.float32)).squeeze().numpy()

mae = mean_absolute_error(y_test, preds)
rmse = mean_squared_error(y_test, preds, squared=False)

print(f"MAE: ${mae:.2f}")
print(f"RMSE: ${rmse:.2f}")
