# Texture Recognition - Pressure

In [10]:
import os
import platform
import copy
from PIL import Image
import math

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

import lightly
from lightly.models import utils
from lightly.models.modules import heads
from lightly.loss import NTXentLoss

from lightly.models.modules.heads import SimSiamPredictionHead
from lightly.models.modules.heads import SimSiamProjectionHead

###

In [3]:
class GifDataset(Dataset):
    def __init__(self, gif_dir, transform=None):
        self.gif_dir = gif_dir
        self.transform = transform
        self.gif_files = [os.path.join(gif_dir, f) for f in os.listdir(gif_dir) if f.endswith('.gif')]
        self.frames = self._extract_frames()

    def _extract_frames(self):
        frames = []
        total_files = len(self.gif_files)
        for i, gif_file in enumerate(self.gif_files):
            gif = Image.open(gif_file)
            total_frames = gif.n_frames
            for frame in range(total_frames):
                gif.seek(frame)
                frame_image = gif.convert('RGB')
                if self.transform:
                    frame_image = self.transform(frame_image)
                frames.append(frame_image)
            # Calculate and print the progress
            percent_complete = ((i + 1) / total_files) * 100
            print(f'Progress: {percent_complete:.2f}%')
        return frames


    def __len__(self):
        return len(self.frames)

    def __getitem__(self, idx):
        return self.frames[idx]


In [4]:
transform = transforms.Compose([transforms.Resize((224, 224)),transforms.ToTensor()])

In [36]:
gif_dataset = GifDataset("/mnt/g/textures/texture recognition/sample testing", transform)

Progress: 0.38%
Progress: 0.77%
Progress: 1.15%
Progress: 1.54%
Progress: 1.92%
Progress: 2.31%
Progress: 2.69%
Progress: 3.08%
Progress: 3.46%
Progress: 3.85%
Progress: 4.23%
Progress: 4.62%
Progress: 5.00%
Progress: 5.38%
Progress: 5.77%
Progress: 6.15%
Progress: 6.54%
Progress: 6.92%
Progress: 7.31%
Progress: 7.69%
Progress: 8.08%
Progress: 8.46%
Progress: 8.85%
Progress: 9.23%
Progress: 9.62%
Progress: 10.00%
Progress: 10.38%
Progress: 10.77%
Progress: 11.15%
Progress: 11.54%
Progress: 11.92%
Progress: 12.31%
Progress: 12.69%
Progress: 13.08%
Progress: 13.46%
Progress: 13.85%
Progress: 14.23%
Progress: 14.62%
Progress: 15.00%
Progress: 15.38%
Progress: 15.77%
Progress: 16.15%
Progress: 16.54%
Progress: 16.92%
Progress: 17.31%
Progress: 17.69%
Progress: 18.08%
Progress: 18.46%
Progress: 18.85%
Progress: 19.23%
Progress: 19.62%
Progress: 20.00%
Progress: 20.38%
Progress: 20.77%
Progress: 21.15%
Progress: 21.54%
Progress: 21.92%
Progress: 22.31%
Progress: 22.69%
Progress: 23.08%
Progr

In [37]:
lightly_dataset = lightly.data.LightlyDataset.from_torch_dataset(gif_dataset)
gif_dataloader = torch.utils.data.DataLoader(
    lightly_dataset,
    batch_size=16,
    shuffle=True
)


In [30]:
num_ftrs = 512
# dimension of the output of the prediction and projection heads
out_dim = proj_hidden_dim = 512
# the prediction head uses a bottleneck architecture
pred_hidden_dim = 128

batch_size = 32

In [31]:
class SimSiam(nn.Module):
    def __init__(
        self, backbone, num_ftrs, proj_hidden_dim, pred_hidden_dim, out_dim
    ):
        super().__init__()
        self.backbone = backbone
        self.projection_head = SimSiamProjectionHead(
            num_ftrs, proj_hidden_dim, out_dim
        )
        self.prediction_head = SimSiamPredictionHead(
            out_dim, pred_hidden_dim, out_dim
        )

    def forward(self, x):
        # get representations
        f = self.backbone(x).flatten(start_dim=1)
        # get projections
        z = self.projection_head(f)
        # get predictions
        p = self.prediction_head(z)
        # stop gradient
        z = z.detach()
        return z, p


# we use a pretrained resnet for this tutorial to speed
# up training time but you can also train one from scratch
resnet = torchvision.models.resnet18()
backbone = nn.Sequential(*list(resnet.children())[:-1])
model = SimSiam(backbone, num_ftrs, proj_hidden_dim, pred_hidden_dim, out_dim)

In [32]:
# SimSiam uses a symmetric negative cosine similarity loss
criterion = lightly.loss.NegativeCosineSimilarity()

# scale the learning rate
lr = 0.1

# use SGD with momentum and weight decay
optimizer = torch.optim.SGD(
    model.parameters(),
    lr=lr,
    momentum=0.9,
    weight_decay=5e-4
)

In [33]:
num_workers = 8
# batch_size = 128
batch_size = 512
seed = 1
# epochs = 50
epochs = 800
# input_size = 256
input_size = 32


In [34]:
gpu_id = 3

device = 'cuda:' + str(gpu_id) if torch.cuda.is_available() else 'cpu'
print(device)

model.to(device)

avg_loss = 0.
avg_output_std = 0.
for e in range(epochs):

    for x0, x1, _, _ in gif_dataloader:
        
        # move images to the gpu
        x0 = x0.to(device)
        x1 = x1.to(device)

        # run the model on both transforms of the images
        # we get projections (z0 and z1) and
        # predictions (p0 and p1) as output
        z0, p0 = model(x0)
        z1, p1 = model(x1)

        # apply the symmetric negative cosine similarity
        # and run backpropagation
        loss = 0.5 * (criterion(z0, p1) + criterion(z1, p0))
        loss.backward()

        optimizer.step()
        optimizer.zero_grad()

        # calculate the per-dimension standard deviation of the outputs
        # we can use this later to check whether the embeddings are collapsing
        output = p0.detach()
        output = torch.nn.functional.normalize(output, dim=1)

        output_std = torch.std(output, 0)
        output_std = output_std.mean()

        # use moving averages to track the loss and standard deviation
        w = 0.9
        avg_loss = w * avg_loss + (1 - w) * loss.item()
        avg_output_std = w * avg_output_std + (1 - w) * output_std.item()

    # the level of collapse is large if the standard deviation of the l2
    # normalized output is much smaller than 1 / sqrt(dim)
    collapse_level = max(0., 1 - math.sqrt(out_dim) * avg_output_std)
    # print intermediate results
    print(f'[Epoch {e:3d}] '
        f'Loss = {avg_loss:.2f} | '
        f'Collapse Level: {collapse_level:.2f} / 1.00')

cuda:3


ValueError: too many values to unpack (expected 2)