In [33]:
# Required dependencies
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
from tqdm import tqdm
import logging
from typing import Tuple, List
import pandas as pd

In [34]:
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Check CUDA availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
logger.info(f"Using device: {device}")

class WADSDataset(Dataset):
    """Custom Dataset for WADS data"""
    def __init__(self, velodyne_dir: str, labels_dir: str, transform=None):
        self.velodyne_dir = velodyne_dir
        self.labels_dir = labels_dir
        self.transform = transform
        self.file_list = sorted([f for f in os.listdir(velodyne_dir) if f.endswith('.bin')])

    def __len__(self) -> int:
        return len(self.file_list)

    def read_bin_file(self, file_path: str) -> np.ndarray:
        """Read binary point cloud file"""
        try:
            return np.fromfile(file_path, dtype=np.float32).reshape(-1, 4)
        except Exception as e:
            logger.error(f"Error reading bin file {file_path}: {e}")
            raise

    def read_label_file(self, file_path: str) -> np.ndarray:
        """Read label file"""
        try:
            return np.fromfile(file_path, dtype=np.uint32)
        except Exception as e:
            logger.error(f"Error reading label file {file_path}: {e}")
            raise

    def convert_to_2d_representation(self, point_cloud: np.ndarray,
                                     height: int = 64, width: int = 512) -> np.ndarray:
        """Convert point cloud to 2D representation"""
        # Extract x, y, z coordinates
        x, y, z = point_cloud[:, 0], point_cloud[:, 1], point_cloud[:, 2]

        # Calculate polar coordinates
        r = np.sqrt(x**2 + y**2)
        theta = np.arctan2(y, x)
        phi = np.arctan2(z, r)

        # Normalize angles to [0, 1]
        theta_normalized = (theta + np.pi) / (2 * np.pi)
        phi_normalized = (phi + np.pi/2) / np.pi

        # Convert to pixel coordinates
        x_pixel = (theta_normalized * width).astype(int)
        y_pixel = (phi_normalized * height).astype(int)

        # Clip values to prevent out of bounds
        x_pixel = np.clip(x_pixel, 0, width-1)
        y_pixel = np.clip(y_pixel, 0, height-1)

        # Create image representation
        image = np.zeros((height, width, 3))
        image[y_pixel, x_pixel, 0] = r / np.max(r)  # normalized radius
        image[y_pixel, x_pixel, 1] = z / np.max(np.abs(z))  # normalized height
        image[y_pixel, x_pixel, 2] = point_cloud[:, 3]  # intensity

        return image

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
        """Get a single item from the dataset"""
        try:
            velodyne_path = os.path.join(self.velodyne_dir, self.file_list[idx])
            label_path = os.path.join(self.labels_dir,
                                      self.file_list[idx].replace('.bin', '.label'))

            # Read data
            point_cloud = self.read_bin_file(velodyne_path)
            labels = self.read_label_file(label_path)

            # Convert to 2D representation
            image = self.convert_to_2d_representation(point_cloud)

            # Apply transforms if any
            if self.transform:
                image = self.transform(image)

            # Convert to torch tensors
            image = torch.FloatTensor(image).permute(2, 0, 1)  # Convert to CxHxW format
            labels = torch.LongTensor(labels)

            return image, labels

        except Exception as e:
            logger.error(f"Error processing item {idx}: {e}")
            raise

class CNNModel(nn.Module):
    """CNN model for WADS classification"""
    def __init__(self, num_classes: int):
        super(CNNModel, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2),
        )

        self.classifier = nn.Sequential(
            nn.Linear(128 * 8 * 64, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

class ModelTrainer:
    """Class to handle model training and evaluation"""
    def __init__(self, model: nn.Module, device: torch.device,
                 criterion: nn.Module, optimizer: torch.optim.Optimizer):
        self.model = model.to(device)
        self.device = device
        self.criterion = criterion
        self.optimizer = optimizer
        self.train_losses = []
        self.val_losses = []

    def train_epoch(self, train_loader: DataLoader) -> float:
        """Train for one epoch"""
        self.model.train()
        total_loss = 0

        for batch_idx, (data, target) in enumerate(tqdm(train_loader, desc="Training")):
            data, target = data.to(self.device), target.to(self.device)

            self.optimizer.zero_grad()
            output = self.model(data)
            loss = self.criterion(output, target)

            loss.backward()
            self.optimizer.step()

            total_loss += loss.item()

        return total_loss / len(train_loader)

    def validate(self, val_loader: DataLoader) -> float:
        """Validate the model"""
        self.model.eval()
        total_loss = 0

        with torch.no_grad():
            for data, target in tqdm(val_loader, desc="Validating"):
                data, target = data.to(self.device), target.to(self.device)
                output = self.model(data)
                loss = self.criterion(output, target)
                total_loss += loss.item()

        return total_loss / len(val_loader)

    def train(self, train_loader: DataLoader, val_loader: DataLoader,
              epochs: int) -> Tuple[List[float], List[float]]:
        """Train the model for specified number of epochs"""
        for epoch in range(epochs):
            logger.info(f"Epoch {epoch+1}/{epochs}")

            train_loss = self.train_epoch(train_loader)
            val_loss = self.validate(val_loader)

            self.train_losses.append(train_loss)
            self.val_losses.append(val_loss)

            logger.info(f"Training Loss: {train_loss:.4f}")
            logger.info(f"Validation Loss: {val_loss:.4f}")

        return self.train_losses, self.val_losses

    def plot_losses(self):
        """Plot training and validation losses"""
        plt.figure(figsize=(10, 6))
        plt.plot(self.train_losses, label='Training Loss')
        plt.plot(self.val_losses, label='Validation Loss')
        plt.title('Model Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()

    def visualize_filters(self):
        """Visualize filters from first convolutional layer"""
        with torch.no_grad():
            # Get the first conv layer weights
            first_layer = self.model.features[0]
            weights = first_layer.weight.cpu().numpy()

            # Plot filters
            fig, axes = plt.subplots(4, 8, figsize=(20, 10))
            for i, ax in enumerate(axes.flat):
                if i < weights.shape[0]:
                    ax.imshow(weights[i, 0], cmap='viridis')
                ax.axis('off')
            plt.show()

def main():
    # Set random seeds for reproducibility
    torch.manual_seed(42)
    np.random.seed(42)

    # Dataset parameters
    VELODYNE_DIR = "./WADS-11/velodyne"
    LABELS_DIR = "./WADS-11/labels"
    BATCH_SIZE = 32
    NUM_WORKERS = 4
    NUM_CLASSES = 20  # Adjust based on your dataset

    # Training parameters
    LEARNING_RATE = 0.001
    EPOCHS = 20

    # Create dataset and dataloaders
    dataset = WADSDataset(VELODYNE_DIR, LABELS_DIR)
    train_size = int(0.7 * len(dataset))
    val_size = int(0.15 * len(dataset))
    test_size = len(dataset) - train_size - val_size

    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
        dataset, [train_size, val_size, test_size]
    )

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
                              shuffle=True, num_workers=NUM_WORKERS)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE,
                            shuffle=False, num_workers=NUM_WORKERS)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE,
                             shuffle=False, num_workers=NUM_WORKERS)

    # Create model and training components
    model = CNNModel(num_classes=NUM_CLASSES)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    # Create trainer and train model
    trainer = ModelTrainer(model, device, criterion, optimizer)
    train_losses, val_losses = trainer.train(train_loader, val_loader, EPOCHS)

    # Plot results
    trainer.plot_losses()
    trainer.visualize_filters()

    # Save model
    torch.save(model.state_dict(), 'wads_model.pth')

    logger.info("Training completed successfully!")

if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        logger.error(f"An error occurred: {e}")
        raise

2025-02-14 14:40:52,646 - INFO - Using device: cuda
2025-02-14 14:40:52,886 - INFO - Epoch 1/20
Training:   0%|          | 0/3 [00:05<?, ?it/s]
2025-02-14 14:40:57,919 - ERROR - An error occurred: DataLoader worker (pid(s) 16412, 12664, 6856, 16212) exited unexpectedly


RuntimeError: DataLoader worker (pid(s) 16412, 12664, 6856, 16212) exited unexpectedly