In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
import torch
import pandas as pd
from PIL import ImageDraw, ImageFont, Image
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch
import torch.nn as nn
from transformers import ViTModel
from torchinfo import summary  # 
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")
import random
import time

In [2]:
DEVICE="cuda:0"
def setAllSeeds(seed):
  os.environ['MY_GLOBAL_SEED'] = str(seed)
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  # torch.cuda.manual_seed_all(seed)
setAllSeeds(42)

In [3]:
df = pd.read_csv("train.csv")
categories=df["Category"].unique()
print(categories)
category=categories[1]
df = df[df["Category"]==category]

['Men Tshirts' 'Sarees' 'Kurtis' 'Women Tshirts' 'Women Tops & Tunics']


In [4]:
delCol = []
idxCol = []
trackNum = []
for i in range(1,11):
    uniName = df["attr_"+str(i)].dropna().unique()
    # print(len(uniName))
    print(uniName)
    if(len(uniName)==1):
        delCol.append("attr_"+str(i))
    else:
        idxCol.append("attr_"+str(i))
        trackNum.append(len(uniName))

['same as saree' 'solid' 'same as border' 'default']
['woven design' 'zari' 'no border' 'solid' 'default' 'temple border']
['small border' 'big border' 'no border']
['multicolor' 'cream' 'white' 'default' 'navy blue' 'yellow' 'green'
 'pink']
['party' 'traditional' 'daily' 'wedding']
['jacquard' 'default' 'tassels and latkans']
['woven design' 'same as saree' 'default' 'zari woven']
['zari woven' 'woven design' 'default' 'solid' 'printed']
['applique' 'elephant' 'floral' 'ethnic motif' 'peacock' 'default' 'solid'
 'checked' 'botanical']
['no' 'yes']


In [5]:
# l=df.columns[3:14].tolist()
# print(l)

In [6]:
df = df.drop(delCol,axis=1)
df.shape

(18346, 13)

In [7]:
df.isna().sum()

id              0
Category        0
len             0
attr_1      10461
attr_2        667
attr_3       2485
attr_4        450
attr_5        697
attr_6      13336
attr_7       9450
attr_8       1881
attr_9       4043
attr_10       528
dtype: int64

In [8]:
id2label={}
label2id={}
attrs={}
total_attr=len(df.columns)
for i in range(3,total_attr):
    labels=df[df.columns[i]].dropna().unique()
    # print(df.columns[i],labels)
    id2label[i-3]={k:labels[k] for k in range(len(labels))}
    label2id[i-3]={labels[k]:k for k in range(len(labels))}
    attrs[i-3]=df.columns[i]
print(id2label)
print(label2id)
print(attrs)

{0: {0: 'same as saree', 1: 'solid', 2: 'same as border', 3: 'default'}, 1: {0: 'woven design', 1: 'zari', 2: 'no border', 3: 'solid', 4: 'default', 5: 'temple border'}, 2: {0: 'small border', 1: 'big border', 2: 'no border'}, 3: {0: 'multicolor', 1: 'cream', 2: 'white', 3: 'default', 4: 'navy blue', 5: 'yellow', 6: 'green', 7: 'pink'}, 4: {0: 'party', 1: 'traditional', 2: 'daily', 3: 'wedding'}, 5: {0: 'jacquard', 1: 'default', 2: 'tassels and latkans'}, 6: {0: 'woven design', 1: 'same as saree', 2: 'default', 3: 'zari woven'}, 7: {0: 'zari woven', 1: 'woven design', 2: 'default', 3: 'solid', 4: 'printed'}, 8: {0: 'applique', 1: 'elephant', 2: 'floral', 3: 'ethnic motif', 4: 'peacock', 5: 'default', 6: 'solid', 7: 'checked', 8: 'botanical'}, 9: {0: 'no', 1: 'yes'}}
{0: {'same as saree': 0, 'solid': 1, 'same as border': 2, 'default': 3}, 1: {'woven design': 0, 'zari': 1, 'no border': 2, 'solid': 3, 'default': 4, 'temple border': 5}, 2: {'small border': 0, 'big border': 1, 'no border': 

In [9]:
def categorize(example):
    for i in attrs:
        # print(example[attrs[i]],type(example[attrs[i]]),pd.isna(example[attrs[i]]))
        if not pd.isna(example[attrs[i]]):
            example[attrs[i]]=label2id[i][example[attrs[i]]]
        else:
            example[attrs[i]]=-100
    return example
df=df.apply(categorize,axis=1)
df.head()

Unnamed: 0,id,Category,len,attr_1,attr_2,attr_3,attr_4,attr_5,attr_6,attr_7,attr_8,attr_9,attr_10
7267,7432,Sarees,10,0,0,0,0,0,0,0,0,0,0
7268,7433,Sarees,10,-100,1,0,1,1,-100,-100,0,1,0
7269,7434,Sarees,10,-100,1,0,2,0,-100,-100,0,2,0
7270,7435,Sarees,10,0,0,1,3,1,-100,1,0,3,0
7271,7436,Sarees,10,1,2,-100,-100,2,-100,-100,-100,-100,1


In [10]:
df.isna().sum()

id          0
Category    0
len         0
attr_1      0
attr_2      0
attr_3      0
attr_4      0
attr_5      0
attr_6      0
attr_7      0
attr_8      0
attr_9      0
attr_10     0
dtype: int64

In [11]:
from transformers import AutoImageProcessor
model_name = 'google/vit-base-patch16-224'
# model_name="google/vit-base-patch32-384"
processor = AutoImageProcessor.from_pretrained(model_name)

Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


In [12]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(df, test_size=0.3)
val_df,test_df=train_test_split(val_df,test_size=0.33)
# train_df=df
# val_df=train_df
# test_df=train_df

In [13]:
# for i in l:
#     print(train_df[i].value_counts())

In [14]:
# for i in l:
#     print(i)
#     for j in set(train_df[i].unique()):
#         x=train_df[i][train_df[i]==j]
#         if x.count()<300 and j!=-100:
#             y=train_df[train_df[i]==j]
#             y.iloc[:,3:]=-100
#             y[i]=j
#             print(y)
#             train_df=pd.concat([train_df,y,y]).reset_index(drop=True)
# for i in l:
#     print(train_df[i].value_counts())

In [15]:
train_df.head()

Unnamed: 0,id,Category,len,attr_1,attr_2,attr_3,attr_4,attr_5,attr_6,attr_7,attr_8,attr_9,attr_10
14810,14975,Sarees,10,-100,1,0,1,0,-100,-100,0,2,1
14161,14326,Sarees,10,0,0,0,0,0,0,0,0,0,0
7941,8106,Sarees,10,-100,1,0,1,1,1,-100,0,2,0
13948,14113,Sarees,10,0,1,1,3,0,-100,3,3,6,0
25169,25334,Sarees,10,-100,1,-100,-100,-100,-100,-100,-100,5,-100


In [16]:
class CustomFashionManager(Dataset):
    def __init__(self,csv_file, root_dir="./",transforms =None):
        self.fashionItems = csv_file
        self.root_dir = root_dir
        self.transforms = transforms
    
    def __len__(self):
        return len(self.fashionItems)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir,f"{self.fashionItems.iloc[idx, 0]:06d}"+'.jpg')
        image = Image.open(img_name)
        attributes = self.fashionItems.iloc[idx, 3:]
        attributes = np.array(attributes)
        attributes = attributes.astype('float')

        inputs=processor(image, return_tensors='pt')
        inputs['labels']=torch.tensor(attributes, dtype=torch.long)
        return inputs

        # if self.transforms:
        #     sample = self.transforms(sample)

        # return sample


In [17]:
train_fashion_data = CustomFashionManager(csv_file=train_df,
                                    root_dir='train_images')
val_fashion_data = CustomFashionManager(csv_file=val_df,
                                    root_dir='train_images')
test_fashion_data = CustomFashionManager(csv_file=test_df,root_dir='train_images')

fig = plt.figure()
        

<Figure size 640x480 with 0 Axes>

In [18]:
import sys
from typing import List
from transformers import ViTConfig,ViTPreTrainedModel,DeiTConfig,DeiTPreTrainedModel,DeiTModel


class CustomConfig(ViTConfig):
    def __init__(self,num_classes_per_label:List[int]=[1],**kwargs):
        super().__init__(**kwargs)
        self.num_classes_per_label = num_classes_per_label
        self.hidden_size_2=384

class MultiLabelMultiClassViT(ViTPreTrainedModel):
    config_class=CustomConfig
    def __init__(self, config: CustomConfig) -> None:
        super().__init__(config)

        self.vit = ViTModel(config, add_pooling_layer=False)
        self.classifiers = nn.ModuleList([
            # nn.Sequential(
            #     nn.Dropout(0.2),
            #     nn.Linear(config.hidden_size, config.hidden_size_2),
            #     nn.ReLU(),
            #     nn.Linear(config.hidden_size_2, num_classes)
            # )
            nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(config.hidden_size, num_classes)) 
            # nn.Linear(config.hidden_size, num_classes)
            for num_classes in config.num_classes_per_label
        ])
        # Initialize weights and apply final processing
        self.post_init()


    def forward(self, pixel_values,labels=None):
        outputs = self.vit(pixel_values).last_hidden_state[:, 0, :]  # CLS token representation
        logits = [classifier(outputs) for classifier in self.classifiers]
        if labels is not None:
            loss=0
            for i in range(len(logits)):
                target=labels[:,i]
                loss += torch.nn.functional.cross_entropy(logits[i], target)
            return {"loss": loss, "logits": logits}
        return {"logits": logits}

# Example usage
num_labels = len(trackNum)  # For example, 5 different labels



In [19]:
trackNum

[4, 6, 3, 8, 4, 3, 4, 5, 9, 2]

In [20]:
from transformers import Trainer, TrainingArguments
from sklearn.metrics import classification_report
batch_size = 32
def collate_fn(batch):
    return {
        'pixel_values': torch.cat([x['pixel_values'] for x in batch],dim=0),
        'labels': torch.stack([x['labels'] for x in batch])
    }

def compute_metrics(pred):
    # logits = pred.predictions
    # labels=pred.label_ids
    # probs = np.stack([np.argmax(logit,axis=1) for logit in logits])
    # probs=probs.T
    # truth_labels=[]
    # preds=[]
    # for i in range(len(probs)):
    #     pred=[]
    #     true=[]
    #     for j in range(len(probs[i])):
    #         pred.append(id2label[j][probs[i][j]])
    #         if labels[i][j]==-100:
    #             true.append(-100)
    #         else:
    #             true.append(id2label[j][labels[i][j]])
    #     preds.append(pred)
    #     truth_labels.append(true)

    # preds=np.array(preds)
    # truth_labels=np.array(truth_labels)

    # labels=truth_labels.flatten()
    # probs=preds.flatten()

    # non_padding_indices = [i for i, label in enumerate(labels) if label != '-100']

    # labels = [labels[i] for i in non_padding_indices]
    # probs = [probs[i] for i in non_padding_indices]

    # print(classification_report(labels,probs))
    # report=classification_report(labels,probs,output_dict=True)
    # return {'accuracy': report['accuracy'],"macro avg f1":report['macro avg']['f1-score']}

    logits = pred.predictions
    labels=pred.label_ids
    probs = np.stack([np.argmax(logit,axis=1) for logit in logits])
    probs=probs.T
    labels=labels.flatten()
    probs=probs.flatten()
    non_padding_indices = [i for i, label in enumerate(labels) if label != -100]
    labels = [labels[i] for i in non_padding_indices]
    probs = [probs[i] for i in non_padding_indices]
    print(classification_report(labels,probs))
    report=classification_report(labels,probs,output_dict=True)
    return {'accuracy': report['accuracy'],"macro avg f1":report['macro avg']['f1-score']}

training_args = TrainingArguments(
  output_dir="./vit4/"+category,
  per_device_train_batch_size=128,
  per_device_eval_batch_size=128,
  evaluation_strategy="epoch",
  save_strategy="epoch",
  logging_strategy="epoch",
  num_train_epochs=6,
  fp16=True,
  learning_rate=2e-4,
  save_total_limit=1,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='wandb',
  load_best_model_at_end=True,
  metric_for_best_model="macro avg f1"
)
config=ViTConfig.from_pretrained(model_name)
config=CustomConfig(num_classes_per_label=trackNum,**config.to_dict())
model = MultiLabelMultiClassViT.from_pretrained(model_name,config=config)
# for param in model.vit.parameters():
#     param.requires_grad = False
#print training params
print(summary(model, input_size=(batch_size,3,224,224), device=DEVICE))
trainer = Trainer(
    model,
    training_args,
    train_dataset=train_fashion_data,
    eval_dataset=val_fashion_data,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    tokenizer=processor,
)

Some weights of MultiLabelMultiClassViT were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['classifiers.0.bias', 'classifiers.0.weight', 'classifiers.1.bias', 'classifiers.1.weight', 'classifiers.2.bias', 'classifiers.2.weight', 'classifiers.3.bias', 'classifiers.3.weight', 'classifiers.4.bias', 'classifiers.4.weight', 'classifiers.5.bias', 'classifiers.5.weight', 'classifiers.6.bias', 'classifiers.6.weight', 'classifiers.7.bias', 'classifiers.7.weight', 'classifiers.8.bias', 'classifiers.8.weight', 'classifiers.9.bias', 'classifiers.9.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Layer (type:depth-idx)                                       Output Shape              Param #
MultiLabelMultiClassViT                                      [32, 4]                   --
├─ViTModel: 1-1                                              [32, 197, 768]            --
│    └─ViTEmbeddings: 2-1                                    [32, 197, 768]            152,064
│    │    └─ViTPatchEmbeddings: 3-1                          [32, 196, 768]            590,592
│    │    └─Dropout: 3-2                                     [32, 197, 768]            --
│    └─ViTEncoder: 2-2                                       [32, 197, 768]            --
│    │    └─ModuleList: 3-3                                  --                        85,054,464
│    └─LayerNorm: 2-3                                        [32, 197, 768]            1,536
├─ModuleList: 1-2                                            --                        --
│    └─Linear: 2-4                                           [32, 4]      

In [None]:
trainer.train()
trainer.save_model(f"./vit4/{category}/final")

[2024-10-30 22:26:06,322] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/home/aseems/anaconda3/envs/mhcp4/compiler_compat/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status




Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkaran21258[0m ([33mkaran912[0m). Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy,Macro avg f1
1,6.8197,6.08925,0.742284,0.575001
2,5.9073,5.909597,0.749131,0.579116
3,5.6658,5.801814,0.753707,0.595282
4,5.5016,5.789892,0.753565,0.595933
5,5.3802,5.765035,0.755552,0.59745


              precision    recall  f1-score   support

           0       0.82      0.86      0.84     14976
           1       0.70      0.66      0.68      6375
           2       0.59      0.40      0.48      2093
           3       0.54      0.62      0.58      2549
           4       0.74      0.69      0.71       797
           5       0.60      0.67      0.63       932
           6       0.84      0.76      0.80       277
           7       0.89      0.31      0.45       131
           8       0.00      0.00      0.00        60

    accuracy                           0.74     28190
   macro avg       0.64      0.55      0.58     28190
weighted avg       0.74      0.74      0.74     28190

              precision    recall  f1-score   support

           0       0.83      0.86      0.84     14976
           1       0.71      0.67      0.69      6375
           2       0.61      0.39      0.48      2093
           3       0.54      0.66      0.60      2549
           4       0.70 

In [22]:
trainer.evaluate(test_fashion_data)

              precision    recall  f1-score   support

           0       0.84      0.86      0.85     73476
           1       0.73      0.70      0.71     31444
           2       0.72      0.43      0.54     10444
           3       0.59      0.69      0.63     12997
           4       0.79      0.74      0.76      4010
           5       0.62      0.82      0.71      4851
           6       0.86      0.85      0.85      1370
           7       0.89      0.43      0.58       625
           8       0.94      0.20      0.32       245

    accuracy                           0.77    139462
   macro avg       0.77      0.64      0.66    139462
weighted avg       0.77      0.77      0.77    139462



{'eval_loss': 5.268681049346924,
 'eval_accuracy': 0.7702169766674793,
 'eval_macro avg f1': 0.6631648524116359,
 'eval_runtime': 118.3216,
 'eval_samples_per_second': 155.052,
 'eval_steps_per_second': 1.217,
 'epoch': 6.0}