In [2]:
!pip install tifffile

Collecting tifffile
  Downloading tifffile-2023.9.26-py3-none-any.whl (222 kB)
                                              0.0/222.9 kB ? eta -:--:--
     -----------------------------          174.1/222.9 kB 5.3 MB/s eta 0:00:01
     -------------------------------------- 222.9/222.9 kB 3.4 MB/s eta 0:00:00
Installing collected packages: tifffile
Successfully installed tifffile-2023.9.26


In [3]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import tifffile
import numpy as np
import os
import glob
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
# from flowlib import flow_to_color, floread

In [54]:
class FlowDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        """
        Args:
            root_dir (string): Directory with all the images and flow files.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.root_dir = root_dir
        self.transform = transform
        self.subfolders = os.listdir(root_dir)

    def __len__(self):
        return sum(len(glob.glob(os.path.join(os.path.join(self.root_dir, subfolder), '*.tif')))//2 for subfolder in self.subfolders)

    def __getitem__(self, idx):
        # Find the right subfolder
        subfolder = None

        for folder in self.subfolders:
            num_images = len(glob.glob(os.path.join(os.path.join(self.root_dir, folder), '*.flo')))
            print(f"folder: {folder}, number of images {num_images}")
            if idx < num_images:
                subfolder = folder
                break
            idx -= num_images
        
        if subfolder is None:
            raise IndexError
        
        # Find the corresponding image and flow file
        image_files = sorted(glob.glob(os.path.join(os.path.join(self.root_dir,subfolder), '*.tif')))
        flow_files = sorted(glob.glob(os.path.join(os.path.join(self.root_dir,subfolder), '*.flo')))

        image_1_path = image_files[2*idx].replace("\\", "/")
        image_2_path = image_files[2*idx+1].replace("\\", "/")

        flow_path = flow_files[idx]

        image1 = tifffile.imread(image_1_path)
        image2 = tifffile.imread(image_2_path)

        flow = self.load_flo_file(flow_path)

        if self.transform:
            image1 = self.transform(image1)
            image2 = self.transform(image2)
        
        # # Stack images along the channel dimension
        images = torch.cat((image1, image2), dim=0)

        return images, flow

    def load_flo_file(self, filename):
        with open(filename, 'rb') as f:
            magic = np.fromfile(f, np.float32, count=1)
            if 202021.25 != magic:
                print('Magic number incorrect. Invalid .flo file')
            else:
                w = np.fromfile(f, np.int32, count=1)[0]
                h = np.fromfile(f, np.int32, count=1)[0]
                #print('Reading %d x %d flo file' % (w, h))
                data = np.fromfile(f, np.float32, count=2*w*h)
                # Reshape data into 3D array (columns, rows, bands)
                data2D = np.resize(data, (h, w, 2))
                return torch.from_numpy(data2D)
            
# Set up the transformations (you might want to adjust these)
transform = transforms.Compose([
    transforms.ToTensor(),
])

In [55]:
dataset = FlowDataset(root_dir='C:/Users/estal/OneDrive - ROCKWOOL Group/Documents/GD/Thesis/04_Code/src/data/raw/PIV_dataset/PIV-genImages/data', transform=transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)#, num_workers=-1)

print("success!")

success!


In [57]:
image_pair, flow = dataset[5300]

folder: backstep, number of images 3200
folder: cylinder, number of images 2050
folder: DNS_turbulence, number of images 2000
