In [48]:
import torch
import os
from PIL import Image
import torch
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset

In [56]:
import random
random.seed(42)  # You can replace 42 with any integer of your choice

# Set seed for CPU and GPU (if using CUDA)
torch.manual_seed(42)  # For CPU

# If using GPU (CUDA), set the seed for CUDA operations as well:
torch.cuda.manual_seed(42)  # For the current GPU device
torch.cuda.manual_seed_all(42)  # For all GPUs if using multiple GPUs


In [57]:
# Custom Dataset Class for Loading Images
class ImageDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.transform = transform
        # Get all the jpeg files from the folder
        
        self.image_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.jpeg') or f.lower().endswith('.jpg')]
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_name = os.path.join(self.folder_path, self.image_files[idx])
        image = Image.open(img_name).convert('RGB')  # Convert to RGB if it's not already
        
        # Apply transformations if any
        if self.transform:
            image = self.transform(image)
        
        return image

In [58]:
# Define image transformations (resizing, conversion to tensor, normalization)
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize to 256x256
    transforms.ToTensor(),          # Convert image to tensor
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize to [-1, 1]
])

# Create dataset instance
dataset = ImageDataset('./crushit2', transform=transform)

In [80]:
batches = DataLoader(dataset, batch_size=800, shuffle=False)

In [81]:
for batch_idx, batch in enumerate(batches):
    print(f"Batch {batch_idx + 1}:")
    print(f"Shape of the batch: {batch.shape}")  # (batch_size, channels, height, width)
    # print(len(batch))
    
    # You can now pass this batch to your CNN
    # For example, to pass to a model:
    # output = model(images)
    
    # Break after one batch for example purposes

Batch 1:
Shape of the batch: torch.Size([800, 3, 256, 256])


In [84]:
Xtr = batch

In [85]:
Xtr.shape

torch.Size([800, 3, 256, 256])

In [92]:
Xtr = Xtr.permute (0, 2,3, 1)
Xtr.shape

torch.Size([800, 256, 256, 3])

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class ConvLayer (nn.Module):
    # in_channels RGB 3, out_channels
    def __init__(self, in_channels, out_channels, kernel_size, stride_length, padding=0):
        super ().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride_length = stride_length
        self.padding = padding

        # initialize the weights and biases for kernels
        self.weights = 

class CNNEncoder (nn.Module):
    def __init__ (self, ):
        # list of layers (kernel, stride)
        # 
        pass

### Let's work group norm:

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F


In [9]:
# Step 1: Input tensor (batch of RGB images)
B, C_in, H, W = 4, 3, 256, 256  # Input: RGB images
images = torch.randn(B, C_in, H, W)
# Step 2: Convolution to increase the number of channels
conv = nn.Conv2d(in_channels=C_in, out_channels=256, kernel_size=3, stride=1, padding = 1)
features = conv(images)  # Output: B x 256 x 256 x 256
features.shape

torch.Size([4, 256, 256, 256])

In [29]:
class GroupNorm2D (nn.Module):
    def __init__ (self, num_groups, num_channels, num_spatial_channels, affine=True, eps=1e-5):
        super().__init__()
        self.num_channels = num_channels
        self.num_groups = num_groups
        self.affine = affine
        self.eps = eps
        self.gamma = None
        self.beta = None
        assert self.num_channels % self.num_groups == 0, f"channels :{num_channels} are not divisible by {num_groups} groups"

        # Learnable parameters
        if affine == True:
            # scale
            self.gamma = nn.Parameter(torch.ones(1, num_channels, *[1] * num_spatial_channels))
            # shift
            self.beta = nn.Parameter (torch.zeros (1, num_channels, *[1] * num_spatial_channels))

    def forward (self, X):
        B, C = X.shape[:2]
        spatial_dims = X.shape[2:]
        assert C == self.num_channels, f"Mismatch between input channels: {C} and num_channels: {self.num_channels} at initialization"

        # rearrage the input in shape of groups as an extra batch dimension
        G = self.num_groups
        group_size = C // G
        X = X.view (B, G, group_size, *spatial_dims)

        print (f"Now shape of X after arranging in groups is {X.shape}")
        # compute mean and variance across group and spatial dimensions
        dims_to_reduce = tuple (range (2, X.dim())) # All Dimensions except B and G
        X_mean_no_keep_dim = X.mean (dim=dims_to_reduce, keepdim=False)
        X_var_no_keep_dim = X.var (dim= dims_to_reduce, keepdim=False)
        X_mean = X.mean (dim=dims_to_reduce, keepdim=True)
        X_var = X.var (dim= dims_to_reduce, keepdim=True)
        print (f"Shapes of means: NoKeepDim: {X_mean_no_keep_dim.shape} KeepDim: {X_mean.shape}")
        print (f"Shapes of vars: NoKeepDim: {X_var_no_keep_dim.shape} KeepDim: {X_var.shape}")

        # normalize:
        X = (X - X_mean) / torch.sqrt(X_var+self.eps)
        print (f"Intermediate shape just after normalization = {X.shape}")
        # reshape back to original shape
        X = X.view (B, C, *spatial_dims)
        print (f"Final shape after norm = {X.shape}")
        # Scale and shift
        return self.gamma * X + self.beta


In [30]:
# utility function we will use later when comparing manual gradients to PyTorch gradients
def cmp(s, custom, torch_version):
    ex = torch.all(custom == torch_version)
    app = torch.allclose(custom, torch_version, rtol=1e-5, atol=1e-7)
    maxdiff = (custom - torch_version).abs().max().item()
    print(f'{s:15s} | exact: {str(ex.item()):5s} | approximate: {str(app):5s} | maxdiff: {maxdiff}')


In [31]:
# Step 1: Input tensor (batch of RGB images)
B, C_in, H, W, Z, Y = 4, 256, 20, 20, 20, 20# Input: RGB images
images = torch.randn(B, C_in, H, W, Z, Y)

normModule = GroupNorm2D (32, 256, 4, affine=True)
torch_group_norm = nn.GroupNorm(num_groups=32, num_channels=256, affine=True)

answer = normModule (images)
torch_answer = torch_group_norm (images)

cmp ("group_norm", answer, torch_answer)

Now shape of X after arranging in groups is torch.Size([4, 32, 8, 20, 20, 20, 20])
Shapes of means: NoKeepDim: torch.Size([4, 32]) KeepDim: torch.Size([4, 32, 1, 1, 1, 1, 1])
Shapes of vars: NoKeepDim: torch.Size([4, 32]) KeepDim: torch.Size([4, 32, 1, 1, 1, 1, 1])
Intermediate shape just after normalization = torch.Size([4, 32, 8, 20, 20, 20, 20])
Final shape after norm = torch.Size([4, 256, 20, 20, 20, 20])
group_norm      | exact: False | approximate: True  | maxdiff: 2.86102294921875e-06


In [2]:
import torch

X = torch.randn (1,2,3,4,5)
0 , 1, 2, 3, 4
X.dim()

5

In [23]:
a = [[1]]* 5

In [24]:
a

[[1], [1], [1], [1], [1]]