In [2]:
# Import libraries
import os
import sys
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [3]:
# Set path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

In [4]:
from src.utils.visualization import (
    get_thermal_stats,
    print_thermal_stats,
    visualize_thermal_sequence,
    get_edge_stats,
    print_edge_stats,
    visualize_edge_sequence,
    visualize_thermal_threshold_comparison,
    get_label_examples
)

In [5]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [6]:
# Load preprocessed dataset
print("\nLoading preprocessed dataset...")
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
preprocessed_dataset_path = os.path.join(project_root, 'data', 'processed', 'preprocessed_dataset.pt')
preprocessed_dataset = torch.load(preprocessed_dataset_path)



Loading preprocessed dataset...


  preprocessed_dataset = torch.load(preprocessed_dataset_path)


In [7]:
# Convert tensors to correct format for model
print("\nFormatting tensors...")
corrected_tensor = preprocessed_dataset['tensors']['corrected'].permute(2, 0, 1).unsqueeze(1)  # (N, 1, H, W)
edge_tensor = preprocessed_dataset['tensors']['edge'].permute(2, 0, 1).unsqueeze(1)            # (N, 1, H, W)
threshold_tensor = preprocessed_dataset['tensors']['threshold']['low'].permute(2, 0, 1).unsqueeze(1)  # (N, 1, H, W)
temporal_tensor = preprocessed_dataset['tensors']['temporal'].permute(2, 3, 0, 1)  # (N, 6, H, W)
labels = preprocessed_dataset['labels']['numeric_labels']


Formatting tensors...


In [8]:
# Print tensor shapes
print("\nTensor shapes after formatting:")
print(f"Corrected tensor: {corrected_tensor.shape}")
print(f"Edge tensor: {edge_tensor.shape}")
print(f"Threshold tensor: {threshold_tensor.shape}")
print(f"Temporal tensor: {temporal_tensor.shape}")
print(f"Labels: {labels.shape}")


Tensor shapes after formatting:
Corrected tensor: torch.Size([15, 1, 480, 640])
Edge tensor: torch.Size([15, 1, 480, 640])
Threshold tensor: torch.Size([15, 1, 480, 640])
Temporal tensor: torch.Size([15, 6, 480, 640])
Labels: torch.Size([15])


In [9]:
# Create refined Dataset class for model
class ThermalDataset(Dataset):
    def __init__(self, preprocessed_data, indices=None, transform=None):
        self.data = preprocessed_data
        self.indices = indices if indices is not None else range(len(preprocessed_data['tensors']['corrected']))
        self.transform = transform

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        real_idx = self.indices[idx]
        sample = {
            'corrected': self.data['tensors']['corrected'][real_idx],
            'edge': self.data['tensors']['edge'][real_idx],
            'temporal': self.data['tensors']['temporal'][real_idx],
            'threshold': self.data['tensors']['threshold']['low'][real_idx],
            'label': self.data['labels']['numeric_labels'][real_idx]
        }
        
        if self.transform:
            sample = self.transform(sample)
            
        return sample

In [10]:
# Split data into train and validation sets
print("\nSplitting data into train and validation sets...")
train_idx, val_idx = train_test_split(
    range(len(corrected_tensor)), 
    test_size=0.2, 
    stratify=labels,
    random_state=42
)


Splitting data into train and validation sets...


In [11]:
# Create datasets
train_dataset = ThermalDataset(preprocessed_dataset, indices=train_idx)
val_dataset = ThermalDataset(preprocessed_dataset, indices=val_idx)

In [12]:
# Create dataloaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

In [13]:
print("\nDataset splits:")
print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")


Dataset splits:
Training samples: 12
Validation samples: 3


In [14]:
# Verify data loading
print("\nVerifying data loading...")
for batch in train_loader:
    print("\nBatch shapes:")
    print(f"Corrected: {batch['corrected'].shape}")
    print(f"Edge: {batch['edge'].shape}")
    print(f"Temporal: {batch['temporal'].shape}")
    print(f"Threshold: {batch['threshold'].shape}")
    print(f"Labels: {batch['label'].shape}")
    break


Verifying data loading...

Batch shapes:
Corrected: torch.Size([12, 640, 15])
Edge: torch.Size([12, 640, 15])
Temporal: torch.Size([12, 640, 15, 6])
Threshold: torch.Size([12, 640, 15])
Labels: torch.Size([12])


In [16]:
# Print class distribution in splits
def print_class_distribution(dataset, split_name):
    labels = [data['label'].item() for data in dataset]
    unique_labels, counts = np.unique(labels, return_counts=True)
    print(f"\n{split_name} class distribution:")
    for label, count in zip(unique_labels, counts):
        print(f"Class {label}: {count} samples ({count/len(dataset)*100:.2f}%)")

print_class_distribution(train_dataset, "Training")
print_class_distribution(val_dataset, "Validation")


Training class distribution:
Class 0: 4 samples (33.33%)
Class 1: 4 samples (33.33%)
Class 2: 4 samples (33.33%)

Validation class distribution:
Class 0: 1 samples (33.33%)
Class 1: 1 samples (33.33%)
Class 2: 1 samples (33.33%)
