In [None]:
# DS340 Multi-Modal Classification Project Outline
#
# Goal: Predict if a piece of furniture is profitable ('Accept' vs. 'Reject')
# Running image-only mode first (CNN only, no tabular features)

# ==============================================================================
# 1. Setup and Imports
# ==============================================================================
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
import requests
from io import BytesIO
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score
import matplotlib.pyplot as plt

# Ensure reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Set device
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

# Define constants
ACCEPTED_CSV_FILE = '../data/sofa data.csv'
REJECTED_CSV_FILE = '../data/rejected_sofas_only.csv'
NUM_CLASSES = 2  # Binary classification (Accept/Reject)
BATCH_SIZE = 32
LEARNING_RATE = 1e-4

# ==============================================================================
# 2. Data Loading (Image-Only Mode)
# ==============================================================================

# # Load datasets and assign labels
# df_accepted = pd.read_csv(ACCEPTED_CSV_FILE)
# df_accepted['is_profitable'] = 1  # Accepted (1)

# df_rejected = pd.read_csv(REJECTED_CSV_FILE)
# df_rejected['is_profitable'] = 0  # Rejected (0)

# # Balance the dataset by randomly sampling rejected examples
# n_accepted = len(df_accepted)
# df_rejected_balanced = df_rejected.sample(n=n_accepted, random_state=42)

# # Combine datasets
# df = pd.concat([df_accepted, df_rejected_balanced], ignore_index=True)
df = pd.read_csv('../data/master_sofa_dataset_final.csv')
print(f"Total dataset size: {len(df)}")
print(f"Class balance (0=Rejected, 1=Accepted):\n{df['decision'].value_counts()}")

# ==============================================================================
# 3. Dataset Class (Image-Only Mode)
# ==============================================================================

class FurnitureDataset(Dataset):
    def __init__(self, df, transforms):
        self.df = df.reset_index(drop=True)
        self.transforms = transforms
        self.image_urls = df['photo'].values
        self.labels = df['decision'].values
        
    def __len__(self):
        return len(self.df)
    
    def load_image_from_url(self, url):
        """Load image from URL using requests and PIL"""
        try:
            # Fetch image from URL
            response = requests.get(url)
            image = Image.open(BytesIO(response.content))
            
            # Convert to RGB if needed (handle PNG, etc.)
            if image.mode != 'RGB':
                image = image.convert('RGB')
                
            return image
        except Exception as e:
            print(f"Error loading image {url}: {e}")
            # Return a gray placeholder for failed loads (better than pure black)
            return Image.new('RGB', (224, 224), color='gray')
    
    def get_original_image(self, idx):
        """Get the original image without transforms"""
        img_url = self.image_urls[idx]
        return self.load_image_from_url(img_url)
    
    def __getitem__(self, idx):
        # Load and transform image
        img_url = self.image_urls[idx]
        image = self.load_image_from_url(img_url)
        image = self.transforms(image)
        
        # Create dummy tabular tensor (1-dimensional, all zeros) to maintain interface
        tabular = torch.zeros(1, dtype=torch.float32)
        
        # Get label
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        
        return image, tabular, label

# ==============================================================================
# 4. Data Transforms and Split
# ==============================================================================

# Standard ImageNet normalization
IMAGE_MEAN = [0.485, 0.456, 0.406]
IMAGE_STD = [0.229, 0.224, 0.225]

test_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGE_MEAN, std=IMAGE_STD)
])

train_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGE_MEAN, std=IMAGE_STD)
])

# Split data
df_train, df_temp = train_test_split(df, test_size=0.3, random_state=42, stratify=df['decision'])
df_val, df_test = train_test_split(df_temp, test_size=0.5, random_state=42, stratify=df_temp['decision'])

# Create datasets
train_dataset = FurnitureDataset(df_train, train_transforms)
val_dataset = FurnitureDataset(df_val, test_transforms)
test_dataset = FurnitureDataset(df_test, test_transforms)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"Train samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")
print(f"Test samples: {len(test_dataset)}")

# ==============================================================================
# 5. Model Architecture (Image-Only Mode)
# ==============================================================================

class ImageOnlyClassifier(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        
        # Load pre-trained ResNet50
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
        
        # Freeze backbone
        for param in self.resnet.parameters():
            param.requires_grad = False
            
        # Replace final fully connected layer
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Sequential(
            nn.Linear(num_features, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, image, tabular=None):
        # Ignore tabular input (maintain interface compatibility)
        return self.resnet(image)

# Initialize model
model = ImageOnlyClassifier(num_classes=NUM_CLASSES).to(DEVICE)

# ==============================================================================
# 6. Training Setup
# ==============================================================================

criterion = nn.CrossEntropyLoss()
# Only optimize the final classifier layers
optimizer = torch.optim.Adam(model.resnet.fc.parameters(), lr=LEARNING_RATE)
num_epochs = 10

def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    all_preds, all_labels = [], []
    
    for images, _, labels in loader:  # Ignore tabular data
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)  # No tabular input needed
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
    
    avg_loss = total_loss / len(loader)
    f1 = f1_score(all_labels, all_preds, average='binary')
    return avg_loss, f1

def evaluate_model(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    all_preds, all_labels = [], []
    
    with torch.no_grad():
        for images, _, labels in loader:  # Ignore tabular data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)  # No tabular input needed
            loss = criterion(outputs, labels)
            
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    avg_loss = total_loss / len(loader)
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='binary')
    return avg_loss, accuracy, f1

def analyze_errors(model, dataset, device, num_examples=10):
    """
    Analyze and display misclassified examples from the dataset
    """
    model.eval()
    misclassified = []
    dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
    
    with torch.no_grad():
        for idx, (image, _, label) in enumerate(dataloader):
            image, label = image.to(device), label.to(device)
            output = model(image)
            _, predicted = torch.max(output.data, 1)
            
            if predicted != label:
                orig_image = dataset.get_original_image(idx)
                misclassified.append({
                    'image': orig_image,
                    'true': label.item(),
                    'pred': predicted.item(),
                    'url': dataset.image_urls[idx]
                })
                
            if len(misclassified) >= num_examples:
                break
    
    # Plot misclassified examples
    fig, axes = plt.subplots(2, 5, figsize=(20, 8))
    axes = axes.ravel()
    
    for idx, example in enumerate(misclassified[:num_examples]):
        axes[idx].imshow(example['image'])
        axes[idx].axis('off')
        axes[idx].set_title(f'True: {"Accept" if example["true"] == 1 else "Reject"}\nPred: {"Accept" if example["pred"] == 1 else "Reject"}')
    
    plt.tight_layout()
    plt.show()
    
    # Print URLs of misclassified images
    print("\nMisclassified Image URLs:")
    for example in misclassified[:num_examples]:
        print(f"True: {'Accept' if example['true'] == 1 else 'Reject'}, Predicted: {'Accept' if example['pred'] == 1 else 'Reject'}")
        print(f"URL: {example['url']}\n")

# ==============================================================================
# 7. Training Loop
# ==============================================================================

print("\n--- Starting Training (Image-Only Mode) ---")
best_val_f1 = 0

for epoch in range(num_epochs):
    train_loss, train_f1 = train_one_epoch(model, train_loader, criterion, optimizer, DEVICE)
    val_loss, val_acc, val_f1 = evaluate_model(model, val_loader, criterion, DEVICE)
    
    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f} | Train F1: {train_f1:.4f}")
    print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f} | Val F1: {val_f1:.4f}")
    
    # Save best model
    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), 'best_image_only_model.pth')
        print("Model saved (New best F1 score)")

# ==============================================================================
# 8. Final Evaluation
# ==============================================================================

print("\n--- Final Test Evaluation ---")
model.load_state_dict(torch.load('best_image_only_model.pth'))
test_loss, test_acc, test_f1 = evaluate_model(model, test_loader, criterion, DEVICE)

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")
print(f"Test F1-Score: {test_f1:.4f}")

# Analyze misclassified examples from test set
print("\n--- Analyzing Misclassified Examples ---")
analyze_errors(model, test_dataset, DEVICE)

# Next steps:
# 1. Try different augmentation strategies
# 2. Experiment with other pre-trained models (MobileNet, EfficientNet)
# 3. Add tabular features back once image pipeline is working

Using device: cpu
Total dataset size: 489


KeyError: 'is_profitable'