<a href="https://colab.research.google.com/github/NOWAYTE/neuromorphic-adas/blob/main/Untitled37.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import sys
import zipfile
import json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.tensorboard import SummaryWriter
from collections import OrderedDict
import matplotlib.pyplot as plt
from tqdm import tqdm
import librosa
import h5py
import cv2

!mkdir -p models utils data/raw data/processed trained_models logs

In [2]:
import json
import os

from google.colab import files
uploaded = files.upload()
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json


In [3]:
!gdown --id 1IBxEHX1bO124z9MGqrDjIPFCZmd1fFRk -O n_cars.7z

Downloading...
From (original): https://drive.google.com/uc?id=1IBxEHX1bO124z9MGqrDjIPFCZmd1fFRk
From (redirected): https://drive.google.com/uc?id=1IBxEHX1bO124z9MGqrDjIPFCZmd1fFRk&confirm=t&uuid=894f6a4a-7bda-4c1b-afc4-beb98e734676
To: /content/n_cars.7z
100% 299M/299M [00:06<00:00, 44.7MB/s]


In [4]:
!7z x n_cars.7z -odata/raw/events_ncars


7-Zip [64] 16.02 : Copyright (c) 1999-2016 Igor Pavlov : 2016-05-21
p7zip Version 16.02 (locale=en_US.UTF-8,Utf16=on,HugeFiles=on,64 bits,2 CPUs Intel(R) Xeon(R) CPU @ 2.00GHz (50653),ASM,AES-NI)

Scanning the drive for archives:
  0M Scan         1 file, 299228901 bytes (286 MiB)

Extracting archive: n_cars.7z
--
Path = n_cars.7z
Type = 7z
Physical Size = 299228901
Headers Size = 238856
Method = LZMA:23
Solid = +
Blocks = 1

  0%      1% 175 - Prophesee_Dataset_n_cars/n-cars_train/background/obj_006566_td.dat                                                                               1% 469 - Prophesee_Dataset_n_cars/n-cars_train/background/obj_006241_td.dat                                                                         

In [5]:
!kaggle datasets download -d breejeshdhar/thermal-image-dataset-for-object-classification -p ./data
with zipfile.ZipFile('./data/thermal-image-dataset-for-object-classification.zip', 'r') as zip_ref:
    zip_ref.extractall('data/raw/thermal')

!kaggle datasets download -d mmoreaux/environmental-sound-classification-50 -p ./data
with zipfile.ZipFile('./data/environmental-sound-classification-50.zip', 'r') as zip_ref:
    zip_ref.extractall('data/raw/audio')

Dataset URL: https://www.kaggle.com/datasets/breejeshdhar/thermal-image-dataset-for-object-classification
License(s): CC-BY-SA-4.0
Downloading thermal-image-dataset-for-object-classification.zip to ./data
 99% 3.23G/3.24G [00:45<00:00, 53.8MB/s]
100% 3.24G/3.24G [00:45<00:00, 77.3MB/s]
Dataset URL: https://www.kaggle.com/datasets/mmoreaux/environmental-sound-classification-50
License(s): CC-BY-NC-SA-4.0
Downloading environmental-sound-classification-50.zip to ./data
 97% 1.38G/1.42G [00:06<00:00, 100MB/s] 
100% 1.42G/1.42G [00:06<00:00, 222MB/s]


In [6]:
%%writefile models/hybrid_fusion.py
import torch
import torch.nn as nn
import torch.nn.functional as F
from .thermal_processor import ThermalEncoder

class HybridFusionModel(nn.Module):
    def __init__(self, num_classes=3, thermal_feat_dim=128):
        """
        Hybrid Neuromorphic-Acoustic-Thermal Fusion Model
        num_classes: 0=normal, 1=siren, 2=hazard
        thermal_feat_dim: Dimension of thermal features
        """
        super().__init__()

        # Neuromorphic Processing Branch
        self.event_encoder = nn.Sequential(
            nn.Conv2d(2, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(32 * 65 * 86, 256)
        )

        # Acoustic Processing Branch
        self.audio_encoder = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(32, 128)
        )

        # Thermal Processing Branch
        self.thermal_encoder = ThermalEncoder(output_dim=thermal_feat_dim)

        # Feature dimensions for attention
        self.event_dim = 256
        self.audio_dim = 128
        self.thermal_dim = thermal_feat_dim

        # Attention-based Fusion
        self.attention = nn.MultiheadAttention(
            embed_dim=self.event_dim + self.audio_dim + self.thermal_dim,
            num_heads=4,
            batch_first=True
        )

        # Classifier
        self.classifier = nn.Sequential(
            nn.Linear(self.event_dim + self.audio_dim + self.thermal_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )

        # Confidence Head
        self.confidence = nn.Sequential(
            nn.Linear(self.event_dim + self.audio_dim + self.thermal_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, event_input, audio_input, thermal_input=None):
        # Process event data
        batch_size, seq_len = event_input.shape[0], event_input.shape[1]
        event_input = event_input.reshape(-1, *event_input.shape[2:])
        event_features = self.event_encoder(event_input)
        event_features = event_features.reshape(batch_size, seq_len, -1).mean(dim=1)

        # Process audio data
        audio_features = self.audio_encoder(audio_input.unsqueeze(1))

        # Process thermal data if provided
        if thermal_input is not None:
            thermal_features = self.thermal_encoder(thermal_input)
            combined = torch.cat([event_features, audio_features, thermal_features], dim=1)
        else:
            combined = torch.cat([event_features, audio_features], dim=1)

        # Apply attention
        attn_output, _ = self.attention(
            combined.unsqueeze(1),
            combined.unsqueeze(1),
            combined.unsqueeze(1)
        )
        fused = attn_output.squeeze(1)

        # Outputs
        classification = self.classifier(fused)
        confidence = self.confidence(fused)

        return classification, confidence


class EventProcessing(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv3d = nn.Conv3d(2, 16, kernel_size=(3,3,3), padding=1)
        self.pool = nn.MaxPool3d((1,2,2))

    def forward(self, x):  # x: [B, T, C, H, W]
        x = x.permute(0, 2, 1, 3, 4)  # [B, C, T, H, W]
        return self.pool(F.relu(self.conv3d(x)))

audio_net = nn.Sequential(
    nn.Conv1d(64, 32, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.MaxPool1d(2),
    nn.Conv1d(32, 64, kernel_size=3, padding=1),
    nn.AdaptiveAvgPool1d(1),
    nn.Flatten()
)

def add_fog(image, severity=0.7):
    fog = np.ones_like(image) * 255 * severity
    return cv2.addWeighted(image, 1-severity, fog, severity, 0)

class HybridFusion(nn.Module):
    def __init__(self, feat_dims=[256, 128, 64]):
        super().__init__()
        self.attn = nn.MultiheadAttention(
            embed_dim=sum(feat_dims),
            num_heads=4,
            batch_first=True
        )

    def forward(self, event_feats, audio_feats, thermal_feats):
        combined = torch.cat([event_feats, audio_feats, thermal_feats], dim=-1)
        attn_out, _ = self.attn(combined, combined, combined)
        return attn_out

Writing models/hybrid_fusion.py


In [7]:
%%writefile utils/neuromorphic_loader.py
import h5py
import numpy as np
import torch

class EventDataLoader:
    def __init__(self, time_window=50.0, height=260, width=346):
        """
        Neuromorphic event data loader
        :param time_window: Time window in milliseconds
        """
        self.time_window = time_window * 1000  # Convert to µs
        self.height = height
        self.width = width

    def load_events(self, file_path):
        with h5py.File(file_path, 'r') as f:
            return {
                't': np.array(f['events/t']),
                'x': np.array(f['events/x']),
                'y': np.array(f['events/y']),
                'p': np.array(f['events/p'])
            }

    def events_to_tensor(self, events):
        """Convert events to tensor representation"""
        min_t = np.min(events['t'])
        max_t = np.max(events['t'])
        num_frames = int(np.ceil((max_t - min_t) / self.time_window))

        # Initialize tensor: [frames, channels, height, width]
        tensor = torch.zeros((num_frames, 2, self.height, self.width))

        for frame_idx in range(num_frames):
            start_t = min_t + frame_idx * self.time_window
            end_t = start_t + self.time_window

            # Find events in current time window
            mask = (events['t'] >= start_t) & (events['t'] < end_t)
            frame_events = {k: v[mask] for k, v in events.items()}

            # Accumulate events
            for t, x, y, p in zip(frame_events['t'], frame_events['x'],
                                  frame_events['y'], frame_events['p']):
                channel = 0 if p > 0 else 1
                if 0 <= y < self.height and 0 <= x < self.width:
                    tensor[frame_idx, channel, y, x] += 1

        return tensor

    def normalize_events(self, tensor):
        """Normalize event counts per frame"""
        # Add small epsilon to avoid division by zero
        frame_sums = tensor.sum(dim=(1, 2, 3), keepdim=True) + 1e-8
        return tensor / frame_sums


Writing utils/neuromorphic_loader.py


In [8]:
%%writefile models/thermal_processor.py
import cv2
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms

class ThermalProcessor:
    def __init__(self, input_size=(224, 224), normalize=True):
        """
        Initialize thermal image processor

        Args:
            input_size (tuple): Target size for resizing images (height, width)
            normalize (bool): Whether to normalize thermal values
        """
        self.input_size = input_size
        self.normalize = normalize
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize(input_size),
            transforms.ToTensor(),
        ])

    def preprocess(self, thermal_image):
        """
        Preprocess a single thermal image

        Args:
            thermal_image (numpy.ndarray): Input thermal image (single channel)

        Returns:
            torch.Tensor: Preprocessed thermal image tensor
        """
        # Convert to float32 if needed
        if thermal_image.dtype != np.float32:
            thermal_image = thermal_image.astype(np.float32)

        # Normalize to [0, 1] if not already
        if thermal_image.max() > 1.0:
            thermal_image = thermal_image / thermal_image.max()

        # Apply transformations
        tensor = self.transform(thermal_image)

        # Add batch dimension if needed
        if len(tensor.shape) == 3:
            tensor = tensor.unsqueeze(0)

        return tensor

    def add_thermal_noise(self, thermal_image, noise_level=0.1):
        """Add realistic thermal noise to the image"""
        noise = np.random.normal(0, noise_level, thermal_image.shape).astype(np.float32)
        return np.clip(thermal_image + noise, 0, 1)

    def adjust_thermal_contrast(self, thermal_image, alpha=1.0, beta=0.0):
        """Adjust contrast of thermal image"""
        return np.clip(alpha * thermal_image + beta, 0, 1)


class ThermalEncoder(nn.Module):
    def __init__(self, input_channels=1, base_channels=32, output_dim=128):
        """
        CNN-based thermal feature extractor
        """
        super().__init__()
        self.features = nn.Sequential(
            # Initial conv block
            nn.Conv2d(input_channels, base_channels, kernel_size=7, stride=2, padding=3),
            nn.BatchNorm2d(base_channels),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),

            # Residual blocks
            self._make_residual_block(base_channels, base_channels * 2, 2),
            self._make_residual_block(base_channels * 2, base_channels * 4, 2),
            self._make_residual_block(base_channels * 4, base_channels * 8, 2),

            # Final pooling
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),

            # Projection head
            nn.Linear(base_channels * 8, output_dim)
        )

    def _make_residual_block(self, in_channels, out_channels, stride):
        """Create a residual block with skip connection"""
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.features(x)

Writing models/thermal_processor.py


In [52]:
%%writefile training/train_utils.py
import os
import torch
from torch.utils.data import Dataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

class MultiModalDataset(Dataset):
    def __init__(self, metadata_path, data_root, transform=None, mode='train'):
        """
        Dataset class for multimodal data (events, audio, thermal)

        Args:
            metadata_path (str): Path to the metadata CSV file
            data_root (str): Root directory containing the data
            transform: Optional transform to be applied on a sample
            mode (str): 'train', 'val', or 'test'
        """
        self.metadata = pd.read_csv(metadata_path)
        self.data_root = data_root
        self.transform = transform
        self.mode = mode

        # Filter by split if specified in metadata
        if 'split' in self.metadata.columns:
            self.metadata = self.metadata[self.metadata['split'] == mode]

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        # Get sample metadata
        sample = self.metadata.iloc[idx]

        # Load event data
        event_data = self._load_events(os.path.join(self.data_root, sample['event_path']))

        # Load audio data
        audio_data = self._load_audio(os.path.join(self.data_root, sample['audio_path']))

        # Load thermal data
        thermal_data = self._load_thermal(os.path.join(self.data_root, sample['thermal_path']))

        # Get label
        label = sample['label']

        if self.transform:
            event_data, audio_data, thermal_data = self.transform(
                event_data, audio_data, thermal_data
            )

        return {
            'events': event_data,
            'audio': audio_data,
            'thermal': thermal_data,
            'label': label
        }

    def _load_events(self, event_path):
        """Load and process event data"""
        from utils.neuromorphic_loader import EventDataLoader
        event_loader = EventDataLoader()
        events = event_loader.load_events(event_path)
        return event_loader.events_to_tensor(events)

    def _load_audio(self, audio_path):
        """Load and preprocess audio data"""
        from utils.audio_processor import AudioProcessor
        audio_processor = AudioProcessor()
        audio = audio_processor.load_audio(audio_path)
        return audio_processor.extract_features(audio)

    def _load_thermal(self, thermal_path):
        """Load and preprocess thermal image"""
        from utils.thermal_processor import ThermalProcessor
        thermal_processor = ThermalProcessor()
        thermal_image = cv2.imread(thermal_path, cv2.IMREAD_GRAYSCALE)
        return thermal_processor.preprocess(thermal_image)

def create_data_loaders(metadata, data_root, batch_size=8, validation_split=0.2):
    """Return train and validation DataLoaders"""
    if isinstance(metadata, str):
        metadata = pd.read_csv(metadata)

    train_meta, val_meta = train_test_split(metadata, test_size=validation_split, random_state=42)

    train_dataset = MultiModalDataset(train_meta, data_root)
    val_dataset = MultiModalDataset(val_meta, data_root)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    return train_loader, val_loader


def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=25, device='cuda'):
    """Train the model"""
    from torch.utils.tensorboard import SummaryWriter
    import time

    # Initialize TensorBoard writer
    writer = SummaryWriter('logs/training')

    # Training history
    history = {
        'train_loss': [], 'val_loss': [],
        'train_acc': [], 'val_acc': []
    }

    best_val_loss = float('inf')

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                dataloader = train_loader
            else:
                model.eval()   # Set model to evaluate mode
                dataloader = val_loader

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data
            for batch in tqdm(dataloader, desc=phase):
                # Get inputs and labels
                events = batch['events'].to(device)
                audio = batch['audio'].to(device)
                thermal = batch['thermal'].to(device)
                labels = batch['label'].to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass
                with torch.set_grad_enabled(phase == 'train'):
                    outputs, confidence = model(events, audio, thermal)
                    loss = criterion(outputs, labels)

                    # Backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * events.size(0)
                _, preds = torch.max(outputs, 1)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train' and scheduler is not None:
                scheduler.step()

            epoch_loss = running_loss / len(dataloader.dataset)
            epoch_acc = running_corrects.double() / len(dataloader.dataset)

            # Record history
            if phase == 'train':
                history['train_loss'].append(epoch_loss)
                history['train_acc'].append(epoch_acc.cpu().numpy())
            else:
                history['val_loss'].append(epoch_loss)
                history['val_acc'].append(epoch_acc.cpu().numpy())

            # Log to TensorBoard
            writer.add_scalar(f'Loss/{phase}', epoch_loss, epoch)
            writer.add_scalar(f'Accuracy/{phase}', epoch_acc, epoch)

            print(f'{phase.capitalize()} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Save best model
            if phase == 'val' and epoch_loss < best_val_loss:
                best_val_loss = epoch_loss
                torch.save(model.state_dict(), 'trained_models/best_model.pth')
                print('New best model saved!')

        print()

    writer.close()
    return history

Overwriting training/train_utils.py


In [23]:
import os
from glob import glob
import pandas as pd

data_root = "data/raw"

def list_files_in_directory(base_path, patterns=["*"], recursive=True):
    """List files in a directory matching patterns."""
    files_found = []
    for pattern in patterns:
        files_found.extend(glob(os.path.join(base_path, "**", pattern), recursive=recursive))
    return files_found

def analyze_dataset_dirs(data_root, max_files=10):
    print("="*60)
    print("DATASET DIRECTORY ANALYSIS")
    print("="*60)

    # --- Event files ---
    event_paths = {
        "train": os.path.join(data_root, 'events_Cars', 'Prophesee_Dataset_n_cars', 'n-cars_train'),
        "test": os.path.join(data_root, 'events_Cars', 'Prophesee_Dataset_n_cars', 'n-cars_Test')
    }
    for split, path in event_paths.items():
        event_files = list_files_in_directory(path, patterns=['*.h5', '*.aedat', '*.bin'])
        print(f"\nEvent files ({split}): {len(event_files)}")
        for f in event_files[:max_files]:
            print(f"  {os.path.relpath(f, data_root)}")

    # --- Audio files ---
    audio_dir = os.path.join(data_root, 'audio', 'audio', 'audio')
    audio_files = list_files_in_directory(audio_dir, patterns=['*.wav', '*.mp3', '*.flac', '*.ogg', '*.m4a'])
    print(f"\nAudio files: {len(audio_files)}")
    for f in audio_files[:max_files]:
        print(f"  {os.path.relpath(f, data_root)}")

    # --- Thermal images ---
    thermal_paths = [
        os.path.join(data_root, 'thermal', 'Thermal Image Dataset', 'SeekThermal', 'Train'),
        os.path.join(data_root, 'thermal', 'Thermal Image Dataset', 'SeekThermal', 'Test'),
        os.path.join(data_root, 'thermal', 'Thermal Image Dataset', 'FLIR', 'Train'),
        os.path.join(data_root, 'thermal', 'Thermal Image Dataset', 'FLIR', 'Test')
    ]
    for path in thermal_paths:
        thermal_files = list_files_in_directory(path, patterns=['*.jpg', '*.jpeg', '*.png'])
        print(f"\nThermal images in {os.path.basename(path)}: {len(thermal_files)}")
        for f in thermal_files[:max_files]:
            print(f"  {os.path.relpath(f, data_root)}")

    # --- CSV files (metadata) ---
    csv_files = list_files_in_directory(data_root, patterns=['*.csv'])
    print(f"\nCSV files found: {len(csv_files)}")
    for f in csv_files:
        print(f"  {os.path.relpath(f, data_root)}")

# Run the analysis
analyze_dataset_dirs(data_root)


DATASET DIRECTORY ANALYSIS

Event files (train): 0

Event files (test): 0

Audio files: 6000
  audio/audio/audio/5-201194-A-38.wav
  audio/audio/audio/3-163727-A-3.wav
  audio/audio/audio/1-23094-B-15.wav
  audio/audio/audio/2-117615-B-48.wav
  audio/audio/audio/5-260875-A-35.wav
  audio/audio/audio/5-243448-A-14.wav
  audio/audio/audio/4-167077-C-20.wav
  audio/audio/audio/1-18755-A-4.wav
  audio/audio/audio/3-159346-B-36.wav
  audio/audio/audio/5-181977-A-35.wav

Thermal images in Train: 4732
  thermal/Thermal Image Dataset/SeekThermal/Train/Car/926.jpg
  thermal/Thermal Image Dataset/SeekThermal/Train/Car/img_thermal_1583324173635.jpg
  thermal/Thermal Image Dataset/SeekThermal/Train/Car/img_thermal_1576656238498.jpg
  thermal/Thermal Image Dataset/SeekThermal/Train/Car/img_thermal_1583324167434.jpg
  thermal/Thermal Image Dataset/SeekThermal/Train/Car/img_thermal_1576656463140.jpg
  thermal/Thermal Image Dataset/SeekThermal/Train/Car/img_thermal_1576932461250.jpg
  thermal/Thermal 

In [37]:
# --- IMPROVED METADATA CREATION SCRIPT WITH MULTI-MODAL LABELS ---
import pandas as pd
import os
from glob import glob
import random

def create_balanced_metadata_csv(data_root, output_path, max_samples=None):
    """
    Create unified metadata CSV for hybrid ADAS training.
    Combines neuromorphic events (NFDD + N-CARS), audio, and thermal datasets with proper labels.
    Randomly chooses the modality to assign the label to avoid dominance of one dataset.
    """
    # --- EVENT FILES ---
    event_files_nfdd = glob(os.path.join(data_root, 'events', 'NFDD', '**', '*.h5'), recursive=True)
    event_files_ncars = []
    ncars_dir = os.path.join(data_root, 'events_ncars', 'Prophesee_Dataset_n_cars')
    for ext in ['*.h5', '*.aedat', '*.dat', '*.bin']:
        event_files_ncars.extend(glob(os.path.join(ncars_dir, '**', ext), recursive=True))
    event_files = event_files_nfdd + event_files_ncars
    print(f"Found {len(event_files)} event files (NFDD + N-CARS)")

    # --- AUDIO FILES ---
    audio_files = glob(os.path.join(data_root, 'audio', 'audio', 'audio', '*.wav'), recursive=True)
    print(f"Found {len(audio_files)} audio files")

    # Load ESC-50 metadata
    esc50_path = os.path.join(data_root, 'audio', 'esc50.csv')
    esc50_df = pd.read_csv(esc50_path)
    audio_metadata_map = dict(zip(esc50_df['filename'], esc50_df['category']))

    # --- THERMAL FILES ---
    thermal_files = glob(os.path.join(data_root, 'thermal', 'Thermal Image Dataset', '**', '*.*'), recursive=True)
    thermal_files = [f for f in thermal_files if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp'))]
    print(f"Found {len(thermal_files)} thermal image files")

    # --- LIMIT SAMPLES ---
    if max_samples is None:
        max_samples = min(len(event_files), len(audio_files), len(thermal_files))
    else:
        max_samples = min(max_samples, len(event_files), len(audio_files), len(thermal_files))
    print(f"Creating metadata for {max_samples} samples")

    # --- CLASS MAPPING ---
    class_mapping = {
        # Event classes
        'fall': 0,
        'nonfall': 1,
        'background': 1,
        'cars': 3,  # N-CARS folder mapping

        # Thermal classes
        'man': 2,
        'woman': 2,
        'child': 2,
        'car': 3,
        'animal': 4,

        # Audio categories
        'siren': 0,
        'car_horn': 3,
        'engine': 3,
        'dog': 4,
        'cat': 4
    }

    metadata = []

    for i in range(max_samples):
        # Cycle through files
        event_path = event_files[i % len(event_files)]
        audio_path = audio_files[i % len(audio_files)]
        thermal_path = thermal_files[i % len(thermal_files)]

        # --- EXTRACT CLASSES ---
        event_class = os.path.basename(os.path.dirname(event_path)).lower()
        thermal_class = os.path.basename(os.path.dirname(thermal_path)).lower()
        audio_filename = os.path.basename(audio_path)
        audio_category = audio_metadata_map.get(audio_filename, 'unknown').lower()

        # --- DETERMINE LABEL AND MODALITY ---
        modalities = []
        if event_class in class_mapping:
            modalities.append(('event', class_mapping[event_class]))
        if audio_category in class_mapping:
            modalities.append(('audio', class_mapping[audio_category]))
        if thermal_class in class_mapping:
            modalities.append(('thermal', class_mapping[thermal_class]))

        if modalities:
            source_modality, label = random.choice(modalities)
        else:
            label = 1  # default normal
            source_modality = 'unknown'

        metadata.append({
            'sample_id': f'sample_{i:04d}',
            'event_path': os.path.relpath(event_path, data_root),
            'audio_path': os.path.relpath(audio_path, data_root),
            'thermal_path': os.path.relpath(thermal_path, data_root),
            'label': label,
            'event_class': event_class,
            'audio_category': audio_category,
            'thermal_class': thermal_class,
            'modality_source': source_modality
        })

    # --- SAVE METADATA CSV ---
    df = pd.DataFrame(metadata)
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    df.to_csv(output_path, index=False)
    print(f"\nBalanced metadata CSV created at {output_path} with {len(df)} samples")

    # --- LABEL DISTRIBUTION ---
    print("\nLabel distribution:")
    label_counts = df['label'].value_counts().sort_index()
    for lbl, count in label_counts.items():
        print(f"  Class {lbl}: {count} samples ({count/len(df)*100:.1f}%)")

    return df

# --- USAGE ---
data_root = "data/raw"
metadata_path = "data/processed/metadata_balanced.csv"

metadata_df = create_balanced_metadata_csv(data_root, metadata_path, max_samples=2000)

print("\nFirst few rows of balanced metadata:")
print(metadata_df.head())


Found 24029 event files (NFDD + N-CARS)
Found 2000 audio files
Found 6843 thermal image files
Creating metadata for 2000 samples

Balanced metadata CSV created at data/processed/metadata_balanced.csv with 2000 samples

Label distribution:
  Class 0: 17 samples (0.9%)
  Class 2: 437 samples (21.9%)
  Class 3: 1520 samples (76.0%)
  Class 4: 26 samples (1.3%)

First few rows of balanced metadata:
     sample_id                                         event_path  \
0  sample_0000  events_ncars/Prophesee_Dataset_n_cars/n-cars_t...   
1  sample_0001  events_ncars/Prophesee_Dataset_n_cars/n-cars_t...   
2  sample_0002  events_ncars/Prophesee_Dataset_n_cars/n-cars_t...   
3  sample_0003  events_ncars/Prophesee_Dataset_n_cars/n-cars_t...   
4  sample_0004  events_ncars/Prophesee_Dataset_n_cars/n-cars_t...   

                            audio_path  \
0  audio/audio/audio/5-201194-A-38.wav   
1   audio/audio/audio/3-163727-A-3.wav   
2   audio/audio/audio/1-23094-B-15.wav   
3  audio/audio/audi

In [53]:
sys.path.append('/content/drive/MyDrive/ADAS_Project')

from models.hybrid_fusion import HybridFusionModel
from utils.audio_processor import AudioProcessor
from utils.neuromorphic_loader import EventDataLoader
from utils.thermal_processor import ThermalProcessor, ThermalEncoder
from training.train_utils import create_data_loaders, train_model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model = HybridFusionModel(num_classes=3, thermal_feat_dim=128).to(device)

metadata_path = "data/processed/metadata.csv"
data_root = "data/raw"

train_loader, val_loader = create_data_loaders(
    metadata_path, data_root, batch_size=8, validation_split=0.2
)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

history = train_model(
    model, train_loader, val_loader,
    criterion, optimizer, scheduler,
    num_epochs=25, device=device
)

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history['train_loss'], label='Training Loss')
plt.plot(history['val_loss'], label='Validation Loss')
plt.title('Loss over Epochs')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history['train_acc'], label='Training Accuracy')
plt.plot(history['val_acc'], label='Validation Accuracy')
plt.title('Accuracy over Epochs')
plt.legend()

plt.tight_layout()
plt.savefig('training_history.png')
plt.show()

print("Training completed!")

Using device: cuda


TypeError: argument of type 'method' is not iterable

In [None]:
# --- MODEL EXPORT CELL ---
# Export model for local deployment
import json

# Save model configuration
config = {
    "model_type": "HybridFusionModel",
    "num_classes": 3,
    "thermal_feat_dim": 128,
    "audio_params": {
        "sample_rate": 16000,
        "n_fft": 1024,
        "hop_length": 512,
        "n_mels": 64
    },
    "event_params": {
        "time_window": 50.0,
        "height": 260,
        "width": 346
    },
    "thermal_params": {
        "input_size": [224, 224],
        "normalize": True
    }
}

with open('trained_models/model_config.json', 'w') as f:
    json.dump(config, f, indent=4)

# Convert to ONNX format (optional)
dummy_events = torch.randn(1, 10, 2, 260, 346).to(device)
dummy_audio = torch.randn(1, 64, 64).to(device)  # Example mel spectrogram shape
dummy_thermal = torch.randn(1, 1, 224, 224).to(device)

torch.onnx.export(
    model,
    (dummy_events, dummy_audio, dummy_thermal),
    "trained_models/model.onnx",
    input_names=["events", "audio", "thermal"],
    output_names=["classification", "confidence"],
    dynamic_axes={
        "events": {0: "batch_size"},
        "audio": {0: "batch_size"},
        "thermal": {0: "batch_size"},
        "classification": {0: "batch_size"},
        "confidence": {0: "batch_size"}
    }
)

# Create a zip file with all necessary files for local deployment
!zip -r adas_deployment.zip trained_models/ utils/ models/ config.py

# Download the deployment package
from google.colab import files
files.download('adas_deployment.zip')

print("Model exported and deployment package created!")

In [15]:
!mkdir -p training

In [47]:
import os
import torch
from torch.utils.data import Dataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import sys

# Add the utils directory to the system path so imports within this file work
sys.path.append('/content/utils')


class MultiModalDataset(Dataset):
    def __init__(self, metadata_path, data_root, transform=None, mode='train'):
        """
        Dataset class for multimodal data (events, audio, thermal)

        Args:
            metadata_path (str): Path to the metadata CSV file
            data_root (str): Root directory containing the data
            transform: Optional transform to be applied on a sample
            mode (str): 'train', 'val', or 'test'
        """
        self.metadata = pd.read_csv(metadata_path)
        self.data_root = data_root
        self.transform = transform
        self.mode = mode

        # Filter by split if specified in metadata
        if 'split' in self.metadata.columns:
            self.metadata = self.metadata[self.metadata['split'] == mode]

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        # Get sample metadata
        sample = self.metadata.iloc[idx]

        # Load event data
        event_data = self._load_events(os.path.join(self.data_root, sample['event_path']))

        # Load audio data
        audio_data = self._load_audio(os.path.join(self.data_root, sample['audio_path']))

        # Load thermal data
        thermal_data = self._load_thermal(os.path.join(self.data_root, sample['thermal_path']))

        # Get label
        label = sample['label']

        if self.transform:
            event_data, audio_data, thermal_data = self.transform(
                event_data, audio_data, thermal_data
            )

        return {
            'events': event_data,
            'audio': audio_data,
            'thermal': thermal_data,
            'label': label
        }

    def _load_events(self, event_path):
        """Load and process event data"""
        from utils.neuromorphic_loader import EventDataLoader
        event_loader = EventDataLoader()
        events = event_loader.load_events(event_path)
        return event_loader.events_to_tensor(events)

    def _load_audio(self, audio_path):
        """Load and preprocess audio data"""
        from utils.audio_processor import AudioProcessor
        audio_processor = AudioProcessor()
        audio = audio_processor.load_audio(audio_path)
        return audio_processor.extract_features(audio)

    def _load_thermal(self, thermal_path):
        """Load and preprocess thermal image"""
        from utils.thermal_processor import ThermalProcessor
        import cv2
        thermal_processor = ThermalProcessor()
        thermal_image = cv2.imread(thermal_path, cv2.IMREAD_GRAYSCALE)
        return thermal_processor.preprocess(thermal_image)

def create_data_loaders(metadata_path, data_root, batch_size=8, validation_split=0.2):
    """Create training and validation data loaders"""
    # Load metadata
    metadata = pd.read_csv(metadata_path)

    # Split into train and validation
    train_meta, val_meta = train_test_split(metadata, test_size=validation_split, random_state=42)

    # Create datasets
    train_dataset = MultiModalDataset(train_meta, data_root, mode='train')
    val_dataset = MultiModalDataset(val_meta, data_root, mode='val')

    # Create data loaders
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True, num_workers=2
    )
    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=batch_size, shuffle=False, num_workers=2
    )

    return train_loader, val_loader

def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=25, device='cuda'):
    """Train the model"""
    from torch.utils.tensorboard import SummaryWriter
    import time

    # Initialize TensorBoard writer
    writer = SummaryWriter('logs/training')

    # Training history
    history = {
        'train_loss': [], 'val_loss': [],
        'train_acc': [], 'val_acc': []
    }

    best_val_loss = float('inf')

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                dataloader = train_loader
            else:
                model.eval()   # Set model to evaluate mode
                dataloader = val_loader

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data
            for batch in tqdm(dataloader, desc=phase):
                # Get inputs and labels
                events = batch['events'].to(device)
                audio = batch['audio'].to(device)
                thermal = batch['thermal'].to(device)
                labels = batch['label'].to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass
                with torch.set_grad_enabled(phase == 'train'):
                    outputs, confidence = model(events, audio, thermal)
                    loss = criterion(outputs, labels)

                    # Backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * events.size(0)
                _, preds = torch.max(outputs, 1)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train' and scheduler is not None:
                scheduler.step()

            epoch_loss = running_loss / len(dataloader.dataset)
            epoch_acc = running_corrects.double() / len(dataloader.dataset)

            # Record history
            if phase == 'train':
                history['train_loss'].append(epoch_loss)
                history['train_acc'].append(epoch_acc.cpu().numpy())
            else:
                history['val_loss'].append(epoch_loss)
                history['val_acc'].append(epoch_acc.cpu().numpy())

            # Log to TensorBoard
            writer.add_scalar(f'Loss/{phase}', epoch_loss, epoch)
            writer.add_scalar(f'Accuracy/{phase}', epoch_acc, epoch)

            print(f'{phase.capitalize()} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Save best model
            if phase == 'val' and epoch_loss < best_val_loss:
                best_val_loss = epoch_loss
                torch.save(model.state_dict(), 'trained_models/best_model.pth')
                print('New best model saved!')

        print()

    writer.close()
    return history

In [12]:
%%writefile utils/audio_processor.py
import librosa
import numpy as np
import torch

class AudioProcessor:
    def __init__(self, sample_rate=16000, n_fft=1024, hop_length=512, n_mels=64):
        self.sr = sample_rate
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.n_mels = n_mels

    def load_audio(self, file_path):
        y, _ = librosa.load(file_path, sr=self.sr)
        return y

    def extract_features(self, audio):
        """Extract mel spectrogram features"""
        # Handle short audio clips
        if len(audio) < self.n_fft:
            audio = np.pad(audio, (0, self.n_fft - len(audio)))

        S = librosa.feature.melspectrogram(
            y=audio,
            sr=self.sr,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels
        )
        log_S = librosa.power_to_db(S, ref=np.max)
        return torch.tensor(log_S, dtype=torch.float32)

    def augment_audio(self, audio, noise_level=0.005):
        """Add realistic noise augmentation"""
        noise = np.random.normal(0, noise_level, len(audio))
        return audio + noise

Writing utils/audio_processor.py


In [42]:
import sys
sys.path.append('/content/drive/MyDrive/ADAS_Project')
sys.path.append('/content/utils')

In [44]:
%%writefile utils/thermal_processor.py
import cv2
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms

class ThermalProcessor:
    def __init__(self, input_size=(224, 224), normalize=True):
        """
        Initialize thermal image processor

        Args:
            input_size (tuple): Target size for resizing images (height, width)
            normalize (bool): Whether to normalize thermal values
        """
        self.input_size = input_size
        self.normalize = normalize
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize(input_size),
            transforms.ToTensor(),
        ])

    def preprocess(self, thermal_image):
        """
        Preprocess a single thermal image

        Args:
            thermal_image (numpy.ndarray): Input thermal image (single channel)

        Returns:
            torch.Tensor: Preprocessed thermal image tensor
        """
        # Convert to float32 if needed
        if thermal_image.dtype != np.float32:
            thermal_image = thermal_image.astype(np.float32)

        # Normalize to [0, 1] if not already
        if thermal_image.max() > 1.0:
            thermal_image = thermal_image / thermal_image.max()

        # Apply transformations
        tensor = self.transform(thermal_image)

        # Add batch dimension if needed
        if len(tensor.shape) == 3:
            tensor = tensor.unsqueeze(0)

        return tensor

    def add_thermal_noise(self, thermal_image, noise_level=0.1):
        """Add realistic thermal noise to the image"""
        noise = np.random.normal(0, noise_level, thermal_image.shape).astype(np.float32)
        return np.clip(thermal_image + noise, 0, 1)

    def adjust_thermal_contrast(self, thermal_image, alpha=1.0, beta=0.0):
        """Adjust contrast of thermal image"""
        return np.clip(alpha * thermal_image + beta, 0, 1)


class ThermalEncoder(nn.Module):
    def __init__(self, input_channels=1, base_channels=32, output_dim=128):
        """
        CNN-based thermal feature extractor
        """
        super().__init__()
        self.features = nn.Sequential(
            # Initial conv block
            nn.Conv2d(input_channels, base_channels, kernel_size=7, stride=2, padding=3),
            nn.BatchNorm2d(base_channels),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),

            # Residual blocks
            self._make_residual_block(base_channels, base_channels * 2, 2),
            self._make_residual_block(base_channels * 2, base_channels * 4, 2),
            self._make_residual_block(base_channels * 4, base_channels * 8, 2),

            # Final pooling
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),

            # Projection head
            nn.Linear(base_channels * 8, output_dim)
        )

    def _make_residual_block(self, in_channels, out_channels, stride):
        """Create a residual block with skip connection"""
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.features(x)

Writing utils/thermal_processor.py
