In [9]:
!pip install einops



In [1]:
import os

# Define dataset path
dataset_path = "/ML/ubl vs non-ubl/data_vit"

# Check if directories exist
assert os.path.exists(f"{dataset_path}/train/images"), "Train images folder missing!"
assert os.path.exists(f"{dataset_path}/train/labels"), "Train labels folder missing!"
assert os.path.exists(f"{dataset_path}/valid/images"), "Validation images folder missing!"
assert os.path.exists(f"{dataset_path}/valid/labels"), "Validation labels folder missing!"
assert os.path.exists(f"{dataset_path}/test/images"), "Test images folder missing!"

print("✅ Dataset structure verified.")

✅ Dataset structure verified.


In [2]:
data_yaml_path = f"{dataset_path}/data.yaml" #your images are directory

# Correcting paths inside data.yaml
with open(data_yaml_path, "w") as f:
    f.write(f"""
train: {dataset_path}/train/images
val: {dataset_path}/valid/images
test: {dataset_path}/test/images

nc: 2
names: ['non-ubl','ubl']
    """)

print("✅ data.yaml correctly updated.")

✅ data.yaml correctly updated.


In [13]:
import torch
from torchvision.datasets import OxfordIIITPet
import matplotlib.pyplot as plt
from random import random
from torchvision.transforms import Resize, ToTensor
from torchvision.transforms.functional import to_pil_image

to_tensor = [Resize((144, 144)), ToTensor()]

class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image = t(image)
        return image, target

def show_images(images, num_samples=40, cols=8):
    """ Plots some samples from the dataset """
    plt.figure(figsize=(15,15))

    # Calculate how many images to skip to get num_samples
    total_images = len(images)
    idx = int(total_images / num_samples)

    for i, img in enumerate(images):
        if i % idx == 0 and int(i/idx) < num_samples:  # Ensure we don't exceed num_samples
            plt.subplot(int(num_samples/cols) + 1, cols, int(i/idx) + 1)
            plt.imshow(to_pil_image(img[0]))

    plt.tight_layout()
    plt.show() # This is needed to display the plot


In [15]:
import yaml
from PIL import Image
import torchvision.transforms as T
import matplotlib.pyplot as plt

def to_pil_image(tensor):
    return T.ToPILImage()(tensor)

# Load YAML config
with open(f"{dataset_path}/data.yaml", 'r') as file:
    config = yaml.safe_load(file)

# Get image paths (adjust based on your YAML structure)
image_dir = f"{dataset_path}/train"  #your images are directory
import os
image_paths = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]

In [None]:
# Load some images
from torchvision.io import read_image

# Make sure read_image is working properly
images = []
for path in image_paths[:100]:  # Start with just 10 images for testing
    try:
        # Ensure path is a string
        if isinstance(path, list):
            path = path[0]  # Take first element if it's a list
            
        # Load with PIL first (more robust)
        from PIL import Image
        import torchvision.transforms as transforms
        
        pil_img = Image.open(path)
        tensor_img = transforms.ToTensor()(pil_img)
        images.append(tensor_img)
    except Exception as e:
        print(f"Error loading {path}: {e}")

print(f"Successfully loaded {len(images)} images")

# Show images
show_images(images)

In [12]:
print("Images length:", len(images))
if len(images) > 0:
    print("First element type:", type(images[0]))
    if hasattr(images[0], "__len__"):
        print("First element length:", len(images[0]))

Images length: 0


In [7]:
#patching

from torch import nn
from einops.layers.torch import Rearrange
from torch import Tensor


class PatchEmbedding(nn.Module):
    def __init__(self, in_channels = 3, patch_size = 8, emb_size = 128):
        self.patch_size = patch_size
        super().__init__()
        self.projection = nn.Sequential(
            # break-down the image in s1 x s2 patches and flat them
            Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=patch_size, p2=patch_size),
            nn.Linear(patch_size * patch_size * in_channels, emb_size)
        )

    def forward(self, x: Tensor) -> Tensor:
        x = self.projection(x)
        return x

# Run a quick test
sample_datapoint = torch.unsqueeze(images[0][0], 0)
print("Initial shape: ", sample_datapoint.shape)
embedding = PatchEmbedding()(sample_datapoint)
print("Patches shape: ", embedding.shape)

IndexError: list index out of range