In [None]:
# Step 1: Install the library
!pip install video-diffusion-pytorch


Collecting video-diffusion-pytorch
  Downloading video_diffusion_pytorch-0.7.0-py3-none-any.whl.metadata (919 bytes)
Collecting einops-exts (from video-diffusion-pytorch)
  Downloading einops_exts-0.0.4-py3-none-any.whl.metadata (621 bytes)
Collecting rotary-embedding-torch (from video-diffusion-pytorch)
  Downloading rotary_embedding_torch-0.8.6-py3-none-any.whl.metadata (675 bytes)
Collecting sacremoses (from video-diffusion-pytorch)
  Downloading sacremoses-0.1.1-py3-none-any.whl.metadata (8.3 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.10->video-diffusion-pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.10->video-diffusion-pytorch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.10->video-diffusion-pytorch)
  Downloading nvidia_cuda_cupt

In [None]:
from video_diffusion_pytorch import Unet3D, GaussianDiffusion, Trainer
import torch


In [None]:
from video_diffusion_pytorch import Unet3D

model = Unet3D(
    dim=64,
    dim_mults=(1, 2, 4),
    channels=6   # or 5, depending on how many spectral bands you use
)


In [None]:
from video_diffusion_pytorch import GaussianDiffusion
diffusion = GaussianDiffusion(
    model,
    image_size=128,
    num_frames=8,
    timesteps=1000,
    loss_type='l2',
    channels=6   # ✅ explicitly set channels here
)

In [None]:
# from video_diffusion_pytorch import GaussianDiffusion

# diffusion = GaussianDiffusion(
#     model,            # the U-Net we just created
#     image_size=128,         # input images must be resized to 128x128
#     num_frames=8,           # number of time steps (frames) in your video clip
#     timesteps=1000,         # number of diffusion steps (noise levels)
#     loss_type='l2'          # standard pixel-wise loss
# )


In [None]:
import h5py
import numpy as np
import torch

# Function to load a sequence of 8 frames from multiple .h5 files
def load_sequence(h5_file_paths, bands, sequence_length=8):
    """
    Load a sequence of 8 frames, each with stacked 6-channel data (VIS/WV/etc.)

    Args:
        h5_file_paths: list of 8 h5 file paths (1 per timestamp)
        bands: list of 6 band names to extract
        sequence_length: number of frames (default=8)

    Returns:
        A tensor of shape (8, 6, 128, 128)
    """
    sequence = []

    for path in h5_file_paths[:sequence_length]:
        with h5py.File(path, 'r') as f:
            frame = []
            for band in bands:
                data = f[band][0]  # shape: (128, 128)
                frame.append(data)
            frame = np.stack(frame)  # shape: (6, 128, 128)
            sequence.append(frame)

    sequence_np = np.stack(sequence)  # shape: (8, 6, 128, 128)
    return torch.tensor(sequence_np, dtype=torch.float32)


In [None]:
from torch.utils.data import Dataset, DataLoader

class CloudMotionDataset(Dataset):
    def __init__(self, h5_file_paths, bands, sequence_length=8):
        self.h5_file_paths = h5_file_paths
        self.bands = bands
        self.sequence_length = sequence_length

        # Ensure only full sequences are kept
        self.valid_start_indices = list(range(0, len(h5_file_paths) - sequence_length + 1))

    def __len__(self):
        return len(self.valid_start_indices)

    def __getitem__(self, idx):
        start_idx = self.valid_start_indices[idx]
        sequence_paths = self.h5_file_paths[start_idx : start_idx + self.sequence_length]
        return load_sequence(sequence_paths, self.bands, self.sequence_length)  # shape: (8, 6, 128, 128)


In [None]:
from google.colab import drive
import os

# 1. Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import glob

# Replace this path with your actual folder containing the .h5 files
folder_path = '/content/drive/MyDrive/ISRO/reshaped_DS/'

# This will collect all .h5 files in sorted time order
all_h5_paths = sorted(glob.glob(folder_path + '*.h5'))

# Define spectral bands you want to use
band_list = ['IMG_VIS', 'IMG_WV', 'IMG_TIR1', 'IMG_TIR2', 'IMG_MIR', 'IMG_SWIR']

# Create dataset
dataset = CloudMotionDataset(all_h5_paths, band_list)


In [None]:
print(len(all_h5_paths))  # should be >= 8


9


In [None]:
from torch.utils.data import DataLoader
import torch
from tqdm import tqdm

# Create DataLoader
dataloader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=2)

# Optimizer
optimizer = torch.optim.Adam(diffusion.parameters(), lr=8e-5)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
diffusion.to(device)

# Training Loop
num_epochs = 10  # or define based on steps you want
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    for batch in tqdm(dataloader):
        batch = batch.to(device)  # shape: (B, 8, 6, 128, 128)

        # Reorder dims to (B, C, F, H, W) as expected by diffusion
        batch = batch.permute(0, 2, 1, 3, 4)  # (B, C, F, H, W)

        loss = diffusion(batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1} loss: {loss.item():.6f}")


Epoch 1/10


  0%|          | 0/1 [00:09<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 78.12 MiB is free. Process 9052 has 14.66 GiB memory in use. Of the allocated memory 14.26 GiB is allocated by PyTorch, and 278.51 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)