In [1]:
import os
# os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import torch
import pandas as pd
from PIL import ImageDraw, ImageFont, Image
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch
import torch.nn as nn
from transformers import ViTModel
from torchinfo import summary  # 
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")
import random
import time

In [2]:
DEVICE="cuda:0"
def setAllSeeds(seed):
  os.environ['MY_GLOBAL_SEED'] = str(seed)
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
setAllSeeds(42)

In [3]:
def show_fashionImage(image):
    """Show image with landmarks"""
    plt.imshow(image)

In [4]:
category="Men Tshirts"
df = pd.read_csv("train.csv")
df = df[df["Category"]==category]
test_df = pd.read_csv("test.csv")
test_df = test_df[test_df["Category"]==category]

In [5]:
df = df.fillna("NA")

In [6]:
# df=df[0:100]

In [7]:
id2label={}
label2id={}
attrs={}
total_attr=len(df.columns)
for i in range(3,total_attr):
    labels=df[df.columns[i]].unique()
    # print(df.columns[i],labels)
    id2label[i-3]={k:labels[k] for k in range(len(labels))}
    label2id[i-3]={labels[k]:k for k in range(len(labels))}
    attrs[i-3]=df.columns[i]
print(id2label)
print(label2id)
print(attrs)

{0: {0: 'default', 1: 'multicolor', 2: 'black', 3: 'white', 4: 'NA'}, 1: {0: 'round', 1: 'polo', 2: 'NA'}, 2: {0: 'printed', 1: 'solid', 2: 'NA'}, 3: {0: 'default', 1: 'solid', 2: 'NA', 3: 'typography'}, 4: {0: 'short sleeves', 1: 'long sleeves', 2: 'NA'}, 5: {0: 'NA'}, 6: {0: 'NA'}, 7: {0: 'NA'}, 8: {0: 'NA'}, 9: {0: 'NA'}}
{0: {'default': 0, 'multicolor': 1, 'black': 2, 'white': 3, 'NA': 4}, 1: {'round': 0, 'polo': 1, 'NA': 2}, 2: {'printed': 0, 'solid': 1, 'NA': 2}, 3: {'default': 0, 'solid': 1, 'NA': 2, 'typography': 3}, 4: {'short sleeves': 0, 'long sleeves': 1, 'NA': 2}, 5: {'NA': 0}, 6: {'NA': 0}, 7: {'NA': 0}, 8: {'NA': 0}, 9: {'NA': 0}}
{0: 'attr_1', 1: 'attr_2', 2: 'attr_3', 3: 'attr_4', 4: 'attr_5', 5: 'attr_6', 6: 'attr_7', 7: 'attr_8', 8: 'attr_9', 9: 'attr_10'}


In [8]:
def categorize(example):
    for i in attrs:
        example[attrs[i]]=label2id[i][example[attrs[i]]]
    return example
df=df.apply(categorize,axis=1)
    

In [9]:
from transformers import ViTImageProcessor
model_name = f'vit/{category}/final'
processor = ViTImageProcessor.from_pretrained(model_name)

In [10]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(df, test_size=0.3)
val_df,test_df=train_test_split(val_df,test_size=0.33)

In [11]:
class CustomFashionManager(Dataset):
    def __init__(self,csv_file, root_dir="./",transforms =None):
        self.fashionItems = csv_file
        self.root_dir = root_dir
        self.transforms = transforms
    
    def __len__(self):
        return len(self.fashionItems)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir,f"{self.fashionItems.iloc[idx, 0]:06d}"+'.jpg')
        image = Image.open(img_name)
        # print(attributes.shape)
        # attributes = attributes.astype('float').reshape(-1, len(attributes))
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        inputs=processor(image, return_tensors='pt')
        return inputs

        # if self.transforms:
        #     sample = self.transforms(sample)

        # return sample


In [12]:
train_fashion_data = CustomFashionManager(csv_file=df,
                                    root_dir='train_images')
test_fashion_data = CustomFashionManager(csv_file=test_df,root_dir='test_images')

fig = plt.figure()
        

<Figure size 640x480 with 0 Axes>

In [13]:
import sys
from typing import List
from transformers import ViTConfig,ViTPreTrainedModel


class CustomConfig(ViTConfig):
    def __init__(self,num_classes_per_label:List[int]=[1],**kwargs):
        super().__init__(**kwargs)
        self.num_classes_per_label = num_classes_per_label

class MultiLabelMultiClassViT(ViTPreTrainedModel):
    config_class=CustomConfig
    def __init__(self, config: CustomConfig) -> None:
        super().__init__(config)

        self.vit = ViTModel(config, add_pooling_layer=False)
        self.classifiers = nn.ModuleList([
            nn.Linear(config.hidden_size, num_classes) 
            for num_classes in config.num_classes_per_label
        ])
        # Initialize weights and apply final processing
        self.post_init()

    def forward(self, pixel_values,labels=None):
        outputs = self.vit(pixel_values).last_hidden_state[:, 0, :]  # CLS token representation
        logits = [classifier(outputs) for classifier in self.classifiers]
        if labels is not None:
            loss=0
            for i in range(len(logits)):
                target=labels[:,i]
                loss += torch.nn.functional.cross_entropy(logits[i], target)
            return {"loss": loss, "logits": logits}
        return {"logits": logits}

# Example usage



In [14]:
from transformers import Trainer, TrainingArguments
from sklearn.metrics import classification_report
batch_size = 32
def collate_fn(batch):
    return {
        'pixel_values': torch.cat([x['pixel_values'] for x in batch],dim=0),
    }

def compute_metrics(pred):
    logits = pred.predictions
    labels=pred.label_ids
    probs = np.stack([np.argmax(logit,axis=1) for logit in logits])
    probs=probs.T
    report=classification_report(labels.flatten(),probs.flatten(),output_dict=True)
    return {'accuracy': report['accuracy'],"macro avg f1":report['macro avg']['f1-score']}

training_args = TrainingArguments(
  output_dir="./vit/"+category,
  per_device_train_batch_size=48,
  per_device_eval_batch_size=48,
  evaluation_strategy="epoch",
  save_strategy="epoch",
  num_train_epochs=1,
  fp16=True,
  learning_rate=2e-4,
  save_total_limit=1,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='wandb',
  load_best_model_at_end=True,
)
model = MultiLabelMultiClassViT.from_pretrained(model_name)
trainer = Trainer(
    model,
    training_args,
    train_dataset=train_fashion_data,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    tokenizer=processor,
)

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [15]:
y_pred=trainer.predict(test_fashion_data)


[2024-10-16 06:23:41,652] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/home/aseems/anaconda3/envs/mhcp4/compiler_compat/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status




Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkaran21258[0m ([33mkaran912[0m). Use [1m`wandb login --relogin`[0m to force relogin


PredictionOutput(predictions=[array([[ 1.5943103 , -0.4648979 , -0.09964082, -0.8956054 ,  0.12494826],
       [ 3.1197891 ,  1.2340044 , -1.9534467 , -1.7316924 , -0.06736726],
       [-0.85313654, -1.1922222 ,  1.5381099 , -1.8314184 ,  0.78785866],
       ...,
       [ 0.6039491 , -0.44324622, -1.1481357 ,  1.0101935 ,  0.01528133],
       [ 0.54879427, -0.27171114, -0.2777048 , -2.182915  ,  0.2820968 ],
       [ 0.5521645 ,  0.7823535 , -1.5502411 , -0.55706775,  0.06808744]],
      dtype=float32), array([[ 0.38823524, -0.16400173, -0.16245508],
       [-2.1587057 ,  2.294446  , -0.4956783 ],
       [ 2.298467  , -3.2238846 ,  1.8347865 ],
       ...,
       [ 1.3263801 , -0.65120447, -0.01661078],
       [ 1.246188  , -0.63716054,  0.6508126 ],
       [ 0.60354763, -0.23331141, -0.24453875]], dtype=float32), array([[ 0.32218823, -0.7926273 ,  0.12727286],
       [-2.945951  ,  2.6385462 ,  0.03830134],
       [ 1.9487152 , -2.1571183 ,  0.97339404],
       ...,
       [ 0.2908851

In [23]:
logits = y_pred.predictions
probs = np.stack([np.argmax(logit,axis=1) for logit in logits])
probs=probs.T
l=[]
for i in range(len(probs)):
    x=[]
    for j in range(len(probs[i])):
        x.append(id2label[j][probs[i][j]])
    l.append(x)
l


[['default', 'round', 'printed', 'NA', 'short sleeves'],
 ['default', 'polo', 'solid', 'solid', 'short sleeves'],
 ['black', 'round', 'printed', 'NA', 'NA'],
 ['white', 'polo', 'solid', 'solid', 'short sleeves'],
 ['white', 'round', 'printed', 'default', 'short sleeves'],
 ['white', 'round', 'printed', 'default', 'short sleeves'],
 ['default', 'polo', 'solid', 'solid', 'short sleeves'],
 ['white', 'round', 'printed', 'default', 'NA'],
 ['default', 'round', 'NA', 'default', 'short sleeves'],
 ['white', 'round', 'printed', 'typography', 'short sleeves'],
 ['multicolor', 'round', 'NA', 'NA', 'NA'],
 ['default', 'round', 'NA', 'default', 'short sleeves'],
 ['multicolor', 'round', 'NA', 'default', 'short sleeves'],
 ['multicolor', 'round', 'NA', 'default', 'NA'],
 ['black', 'round', 'NA', 'default', 'short sleeves'],
 ['white', 'round', 'NA', 'default', 'short sleeves'],
 ['multicolor', 'polo', 'solid', 'solid', 'short sleeves'],
 ['white', 'NA', 'printed', 'NA', 'NA'],
 ['white', 'round', 

In [16]:
import gc
del model, trainer
torch.cuda.empty_cache()
gc.collect()

3174