## Install MMSegmentation

In [2]:
%%capture
!pip install -U openmim
!mim install mmengine
!mim install "mmcv>=2.0.0"
!pip install "mmsegmentation>=1.0.0"
!pip install ftfy

In [3]:
#%%capture
#!git clone -b main https://github.com/open-mmlab/mmsegmentation.git
#!cd mmsegmentation
#!pip install -v -e .

## Example

In [4]:
#!mim download mmsegmentation --config pspnet_r50-d8_4xb2-40k_cityscapes-512x1024 --dest .

processing pspnet_r50-d8_4xb2-40k_cityscapes-512x1024...
[32mpspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth exists in /kaggle/working[0m
[32mSuccessfully dumped pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py to /kaggle/working[0m


In [5]:
from mmseg.apis import inference_model, init_model, show_result_pyplot
import mmcv

config_file = '/kaggle/working/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py'
checkpoint_file = '/kaggle/working/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'

# build the model from a config file and a checkpoint file
model = init_model(config_file, checkpoint_file, device='cpu')

# test a single image and show the results
img = '/kaggle/working/mmsegmentation/demo/demo.png'  # or img = mmcv.imread(img), which will only load it once
result = inference_model(model, img)
# visualize the results in a new window
show_result_pyplot(model, img, result, show=True)
# or save the visualization results to image files
# you can change the opacity of the painted segmentation map in (0, 1].
show_result_pyplot(model, img, result, show=True, out_file='result.jpg', opacity=0.5)




Loads checkpoint by local backend from path: /kaggle/working/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth




array([[[ 87,  94,  87],
        [ 69,  80,  74],
        [ 94,  99,  99],
        ...,
        [ 86,  90,  80],
        [ 88,  91,  81],
        [ 86,  90,  80]],

       [[ 89,  96,  88],
        [ 73,  83,  76],
        [ 94,  99,  99],
        ...,
        [ 86,  90,  80],
        [ 88,  90,  79],
        [ 86,  90,  80]],

       [[ 87,  94,  87],
        [ 73,  83,  76],
        [ 94,  99,  99],
        ...,
        [ 86,  90,  80],
        [ 86,  88,  78],
        [ 86,  90,  80]],

       ...,

       [[ 88,  63,  91],
        [ 88,  63,  91],
        [ 88,  61,  90],
        ...,
        [102,  77, 102],
        [100,  76, 101],
        [102,  77, 102]],

       [[ 88,  61,  90],
        [ 88,  63,  91],
        [ 86,  61,  89],
        ...,
        [ 99,  73, 100],
        [100,  74, 100],
        [100,  74, 101]],

       [[ 88,  61,  90],
        [ 88,  63,  91],
        [ 86,  61,  89],
        ...,
        [ 90,  60,  88],
        [ 89,  62,  89],
        [ 92,  65,  92]]

## P2 SWIN

In [81]:
import json
import os
import pandas as pd
from PIL import Image
from torchvision import transforms

import torch 
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [82]:
train_file = "/kaggle/input/annotations-fashionpedia/instances_attributes_train2020.json"

In [83]:
def load_metadata(filename):
    test_merged_df = None
    with open(filename) as json_data:
        data = json.load(json_data)
        test_categories = pd.DataFrame(data['categories'])
        test_img = pd.DataFrame(data['images'])
        test_annotations = pd.DataFrame(data['annotations'])
        test_merged_df = pd.merge(test_annotations, test_img, left_on='image_id', right_on='id', how='outer')
        test_merged_df = test_merged_df.drop(columns=['id_x', 'id_y', 'license', 'time_captured', 'isstatic', 'original_url', 'iscrowd', 'kaggle_id'])
        test_merged_df["img_area"] = test_merged_df["height"] * test_merged_df["width"]
        test_merged_df['area_ratio'] = test_merged_df['area'] / test_merged_df['img_area']
    return test_merged_df

In [123]:
class CustomDataset(Dataset):
    def __init__(self, metadata_file, split, transform=None, image_dir="/kaggle/input/imaterialist-fashion-2020-fgvc7"):
        self.metadata = load_metadata(metadata_file)
        self.transform = transform
        self.image_dir = os.path.join(image_dir, split)
        
        
    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, index):
        filename = self.metadata['file_name'][index]
        label = self.metadata['segmentation'][index]
       
        image_path = os.path.join(self.image_dir, filename)
        image = Image.open(image_path).convert("RGB")
     
        if self.transform:
            image = self.transform(image)
            
        return image, label

In [124]:
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
    return running_loss / len(train_loader.dataset)


In [125]:
def validate(model, val_loader, criterion, device):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item() * inputs.size(0)
            
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    accuracy = correct / total
    return val_loss / len(val_loader.dataset), accuracy


In [126]:
train_file = "/kaggle/input/annotations-fashionpedia/instances_attributes_train2020.json"
test_file = "/kaggle/input/annotations-fashionpedia/instances_attributes_val2020.json"

In [127]:
transform = transforms.Compose([
    transforms.Resize((512, 512)), 
    transforms.ToTensor(),
])

In [128]:
train_dataset = CustomDataset(train_file, "train", transform=transform)
val_dataset = CustomDataset(test_file, "test", transform=transform)

In [129]:
config_file = '/kaggle/working/mmsegmentation/configs/swin/swin-base-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py'

In [130]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

batch_size = 32
num_epochs = 10


model = init_model(config_file, device=device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001) 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)


for epoch in range(num_epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    val_loss, val_accuracy = validate(model, val_loader, criterion, device)
    
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')




RuntimeError: each element in list of batch should be of equal size