In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
import torch
import pandas as pd
from PIL import ImageDraw, ImageFont, Image
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch
import torch.nn as nn
from transformers import ViTModel
from torchinfo import summary  # 
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")
import random
import time
from sklearn.utils.class_weight import compute_class_weight
from transformers import EfficientNetForImageClassification

In [2]:
DEVICE="cuda:0"
def setAllSeeds(seed):
  os.environ['MY_GLOBAL_SEED'] = str(seed)
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
# setAllSeeds(42)

In [3]:
df = pd.read_csv("train.csv")
categories=df["Category"].unique()
print(categories)
category=categories[1]
df = df[df["Category"]==category]

['Men Tshirts' 'Sarees' 'Kurtis' 'Women Tshirts' 'Women Tops & Tunics']


In [4]:
test_df=pd.read_csv("test.csv")
test_df=test_df[test_df["Category"]==category]

In [5]:
df.isna().sum()

id              0
Category        0
len             0
attr_1      10461
attr_2        667
attr_3       2485
attr_4        450
attr_5        697
attr_6      13336
attr_7       9450
attr_8       1881
attr_9       4043
attr_10       528
dtype: int64

In [6]:
# attr_i=1
# removing_attri=[]
# df.iloc[:,3:2+attr_i]=np.nan
# df.iloc[:,3+attr_i:]=np.nan


In [7]:
print(df.value_counts())

id     Category  len  attr_1         attr_2        attr_3        attr_4      attr_5       attr_6               attr_7         attr_8        attr_9        attr_10
7432   Sarees    10   same as saree  woven design  small border  multicolor  party        jacquard             woven design   zari woven    applique      no         1
19478  Sarees    10   same as saree  woven design  small border  multicolor  party        jacquard             woven design   zari woven    applique      no         1
19418  Sarees    10   same as saree  zari          small border  cream       party        jacquard             woven design   zari woven    elephant      yes        1
19436  Sarees    10   same as saree  zari          big border    multicolor  daily        tassels and latkans  default        woven design  checked       no         1
19445  Sarees    10   default        zari          big border    cream       traditional  jacquard             default        zari woven    ethnic motif  no         1
   

In [8]:
delCol = []
idxCol = []
trackNum = []
weights=[]
m=nn.Softmax(dim=1)
l=[]
for i in range(1,11):
    uniName = df["attr_"+str(i)].dropna().unique()
    print(uniName)
    if(len(uniName)==0):
        delCol.append("attr_"+str(i))
    else:
        idxCol.append("attr_"+str(i))
        l.append('attr_'+str(i))
        trackNum.append(len(uniName))
        class_weights=compute_class_weight(class_weight="balanced",classes=uniName,y=df["attr_"+str(i)].dropna())
        weights.append(torch.tensor([1]*len(uniName),dtype=torch.float32).to(DEVICE))


['same as saree' 'solid' 'same as border' 'default']
['woven design' 'zari' 'no border' 'solid' 'default' 'temple border']
['small border' 'big border' 'no border']
['multicolor' 'cream' 'white' 'default' 'navy blue' 'yellow' 'green'
 'pink']
['party' 'traditional' 'daily' 'wedding']
['jacquard' 'default' 'tassels and latkans']
['woven design' 'same as saree' 'default' 'zari woven']
['zari woven' 'woven design' 'default' 'solid' 'printed']
['applique' 'elephant' 'floral' 'ethnic motif' 'peacock' 'default' 'solid'
 'checked' 'botanical']
['no' 'yes']


In [9]:
df = df.drop(delCol,axis=1)
df.shape

(18346, 13)

In [10]:
df.isna().sum()

id              0
Category        0
len             0
attr_1      10461
attr_2        667
attr_3       2485
attr_4        450
attr_5        697
attr_6      13336
attr_7       9450
attr_8       1881
attr_9       4043
attr_10       528
dtype: int64

In [11]:
# df.dropna(inplace=True)

In [12]:
id2label={}
label2id={}
attrs={}
total_attr=len(df.columns)
for i in range(3,total_attr):
    labels=df[df.columns[i]].dropna().unique()
    # print(df.columns[i],labels)
    id2label[i-3]={k:labels[k] for k in range(len(labels))}
    label2id[i-3]={labels[k]:k for k in range(len(labels))}
    attrs[i-3]=df.columns[i]
print(id2label)
print(label2id)
print(attrs)

{0: {0: 'same as saree', 1: 'solid', 2: 'same as border', 3: 'default'}, 1: {0: 'woven design', 1: 'zari', 2: 'no border', 3: 'solid', 4: 'default', 5: 'temple border'}, 2: {0: 'small border', 1: 'big border', 2: 'no border'}, 3: {0: 'multicolor', 1: 'cream', 2: 'white', 3: 'default', 4: 'navy blue', 5: 'yellow', 6: 'green', 7: 'pink'}, 4: {0: 'party', 1: 'traditional', 2: 'daily', 3: 'wedding'}, 5: {0: 'jacquard', 1: 'default', 2: 'tassels and latkans'}, 6: {0: 'woven design', 1: 'same as saree', 2: 'default', 3: 'zari woven'}, 7: {0: 'zari woven', 1: 'woven design', 2: 'default', 3: 'solid', 4: 'printed'}, 8: {0: 'applique', 1: 'elephant', 2: 'floral', 3: 'ethnic motif', 4: 'peacock', 5: 'default', 6: 'solid', 7: 'checked', 8: 'botanical'}, 9: {0: 'no', 1: 'yes'}}
{0: {'same as saree': 0, 'solid': 1, 'same as border': 2, 'default': 3}, 1: {'woven design': 0, 'zari': 1, 'no border': 2, 'solid': 3, 'default': 4, 'temple border': 5}, 2: {'small border': 0, 'big border': 1, 'no border': 

In [13]:
def categorize(example):
    for i in attrs:
        # print(example[attrs[i]],type(example[attrs[i]]),pd.isna(example[attrs[i]]))
        if not pd.isna(example[attrs[i]]):
            example[attrs[i]]=label2id[i][example[attrs[i]]]
        else:
            example[attrs[i]]=-100
    return example
df=df.apply(categorize,axis=1)
df.head()

Unnamed: 0,id,Category,len,attr_1,attr_2,attr_3,attr_4,attr_5,attr_6,attr_7,attr_8,attr_9,attr_10
7267,7432,Sarees,10,0,0,0,0,0,0,0,0,0,0
7268,7433,Sarees,10,-100,1,0,1,1,-100,-100,0,1,0
7269,7434,Sarees,10,-100,1,0,2,0,-100,-100,0,2,0
7270,7435,Sarees,10,0,0,1,3,1,-100,1,0,3,0
7271,7436,Sarees,10,1,2,-100,-100,2,-100,-100,-100,-100,1


In [14]:
df.isna().sum()

id          0
Category    0
len         0
attr_1      0
attr_2      0
attr_3      0
attr_4      0
attr_5      0
attr_6      0
attr_7      0
attr_8      0
attr_9      0
attr_10     0
dtype: int64

In [15]:
val_df = df

In [16]:
from sklearn.model_selection import train_test_split


train_df, val_df = train_test_split(df, test_size=0.1, random_state=42)

# min_count = train_df[f'attr_{attr_i}'].value_counts().min()
# print(f"The minimum count across all categories is: {min_count}")
# df_sampled = train_df.groupby(f'attr_{attr_i}').sample(n=min_count, random_state=42)
# train_df = df_sampled

In [17]:
for i in l:
    print(train_df[i].value_counts())

attr_1
-100    9415
 0      4691
 2      1456
 3       772
 1       177
Name: count, dtype: int64
attr_2
 1      7662
 0      5643
 5      1162
 4       773
-100     607
 3       371
 2       293
Name: count, dtype: int64
attr_3
 0      8565
 1      5184
-100    2248
 2       514
Name: count, dtype: int64
attr_4
 0      4623
 1      4314
 3      3665
 2      2408
-100     411
 7       313
 4       304
 5       245
 6       228
Name: count, dtype: int64
attr_5
 0      9389
 1      4442
 2      1204
 3       846
-100     630
Name: count, dtype: int64
attr_6
-100    12001
 0       3769
 1        492
 2        249
Name: count, dtype: int64
attr_7
-100    8497
 0      3343
 3      2412
 1      1911
 2       348
Name: count, dtype: int64
attr_8
 0      12174
-100     1699
 3        984
 1        732
 2        714
 4        208
Name: count, dtype: int64
attr_9
-100    3652
 5      2944
 3      2643
 4      2335
 2      2241
 6      1004
 1       639
 0       571
 7       258
 8       224
Name

In [18]:
# weights[4]=torch.tensor([1,20],dtype=torch.float32).to(DEVICE)
# weights[9]=torch.tensor([1,1,1,1,3,2],dtype=torch.float32).to(DEVICE)
# weights[7]=torch.tensor([1,10],dtype=torch.float32).to(DEVICE)

In [19]:
# for i in l:
#     print(i)
#     for j in set(train_df[i].unique()):
#         x=train_df[i][train_df[i]==j]
#         if x.count()<300 and j!=-100:
#             y=train_df[train_df[i]==j]
#             y.iloc[:,3:]=-100
#             y[i]=j
#             print(y)
#             train_df=pd.concat([train_df,y,y]).reset_index(drop=True)
# for i in l:
#     print(train_df[i].value_counts())

In [20]:
train_df.head()

Unnamed: 0,id,Category,len,attr_1,attr_2,attr_3,attr_4,attr_5,attr_6,attr_7,attr_8,attr_9,attr_10
14469,14634,Sarees,10,3,1,1,3,1,-100,-100,0,3,0
21667,21832,Sarees,10,-100,0,-100,0,2,-100,-100,-100,-100,0
24382,24547,Sarees,10,-100,1,0,1,1,-100,-100,0,2,0
17978,18143,Sarees,10,-100,1,1,6,1,-100,-100,0,1,0
20916,21081,Sarees,10,0,0,0,0,0,0,0,0,-100,0


In [21]:
# print(train_df[f'attr_{attr_i}'].value_counts())
# print(val_df[f'attr_{attr_i}'].value_counts())

In [22]:
from transformers import AutoImageProcessor
# model_name = 'google/vit-base-patch16-224'
# model_name=f"vit5/{category}/final"
# model_name = 'google/efficientnet-b0'
# model_name = 'vit6/'+category+'/final'
# model_name="google/vit-large-patch16-224"
# model_name="Shadowking912/inception_v3"
model_name="vit6/"+category+"/final"
# processor = AutoImageProcessor.from_pretrained(model_name)

In [None]:
class CustomFashionManager(Dataset):
    def __init__(self,csv_file, root_dir="./",transforms=None):
        self.fashionItems = csv_file
        self.root_dir = root_dir
        self.transforms = transforms
    
    def __len__(self):
        return len(self.fashionItems)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir,f"{self.fashionItems.iloc[idx, 0]:06d}"+'.jpg')
        image = Image.open(img_name)
        attributes = self.fashionItems.iloc[idx, 3:]
        attributes = np.array(attributes)
        attributes = attributes.astype('float')
        
        # inputs=processor(image, return_tensors='pt')
        inputs={}
        # inputs['labels']=torch.tensor(attributes, dtype=torch.long)
        inputs['pixel_values'] = self.transforms(image).unsqueeze(0)

        return inputs


In [24]:
from torchvision import transforms
transformations=transforms.Compose([
    transforms.Resize(299),
    transforms.CenterCrop(299),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
# transformations=None
train_fashion_data = CustomFashionManager(csv_file=train_df,
                                    root_dir='train_images',transforms=transformations)
val_fashion_data = CustomFashionManager(csv_file=val_df,
                                    root_dir='train_images',transforms=transformations)
test_fashion_data = CustomFashionManager(csv_file=test_df,root_dir='test_images',transforms=transformations)
        

In [25]:
import sys
from typing import List
from transformers import (
    ViTConfig,ViTPreTrainedModel,EfficientNetConfig,EfficientNetPreTrainedModel,
    EfficientNetModel,PretrainedConfig,PreTrainedModel)
from torchvision import models

class CustomConfig(PretrainedConfig):
    def __init__(self,num_classes_per_label:List[int]=[1],**kwargs):
        super().__init__(**kwargs)
        self.num_classes_per_label = num_classes_per_label

            
class MultiLabelMultiClassViT(PreTrainedModel):
    config_class=CustomConfig
    def __init__(self, config: CustomConfig,pretrain=False) -> None:
        super().__init__(config)

        self.model = models.inception_v3(pretrained=pretrain)
        
        self.classifiers = nn.ModuleList([
            # nn.Sequential(nn.Dropout(0.2),
            # nn.Linear(config.hidden_size, 32),
            # nn.ReLU(),
            # nn.Dropout(0.2),
            nn.Linear(self.model.fc.in_features, num_classes)
            for num_classes in config.num_classes_per_label
        ])
        self.classifiers2 = nn.ModuleList([
            # nn.Sequential(nn.Dropout(0.2),
            # nn.Linear(config.hidden_size, 32),
            # nn.ReLU(),
            # nn.Dropout(0.2),
            nn.Linear(self.model.AuxLogits.fc.in_features, num_classes)
            for num_classes in config.num_classes_per_label
        ])
        self.model.fc=nn.Identity()
        self.model.AuxLogits.fc=nn.Identity()
        # Initialize weights and apply final processing
        self.post_init()

    def forward(self, pixel_values,labels=None):
        # outputs = self.efficientnet(pixel_values).last_hidden_state[:, 0, :]  # CLS token representation
        # outputs = outputs.reshape(outputs.shape[0],-1)
        if self.training:
            outputs,aux=self.model(pixel_values)
            logits = [classifier(outputs) for classifier in self.classifiers]
            aux_logits = [classifier(aux) for classifier in self.classifiers2]
        else:
            outputs=self.model(pixel_values)
            logits = [classifier(outputs) for classifier in self.classifiers]
        
        if labels is not None:
            loss=0
            # loss_weights=np.array([1,1,1,1,1,1,1,5])
            # loss_weights=loss_weights/loss_weights.sum()
            for i in range(len(logits)):
                target=labels[:,i]
                loss += torch.nn.functional.cross_entropy(logits[i], target,ignore_index=-100)
                if self.training:
                    loss +=0.4*torch.nn.functional.cross_entropy(aux_logits[i], target,ignore_index=-100)
            return {"loss": loss, "logits": logits}
        return {"logits": logits}

    
# Example usage
num_labels = len(trackNum)  # For example, 5 different labels



In [26]:
# config=EfficientNetConfig.from_pretrained(model_name)
# config=CustomConfig(num_classes_per_label=trackNum)
# model = MultiLabelMultiClassViT.from_pretrained(model_name, config=config)
# # model.model.aux_logits=False

model=MultiLabelMultiClassViT.from_pretrained(model_name)
print(summary(model))

Layer (type:depth-idx)                   Param #
MultiLabelMultiClassViT                  --
├─Inception3: 1-1                        --
│    └─BasicConv2d: 2-1                  --
│    │    └─Conv2d: 3-1                  864
│    │    └─BatchNorm2d: 3-2             64
│    └─BasicConv2d: 2-2                  --
│    │    └─Conv2d: 3-3                  9,216
│    │    └─BatchNorm2d: 3-4             64
│    └─BasicConv2d: 2-3                  --
│    │    └─Conv2d: 3-5                  18,432
│    │    └─BatchNorm2d: 3-6             128
│    └─MaxPool2d: 2-4                    --
│    └─BasicConv2d: 2-5                  --
│    │    └─Conv2d: 3-7                  5,120
│    │    └─BatchNorm2d: 3-8             160
│    └─BasicConv2d: 2-6                  --
│    │    └─Conv2d: 3-9                  138,240
│    │    └─BatchNorm2d: 3-10            384
│    └─MaxPool2d: 2-7                    --
│    └─InceptionA: 2-8                   --
│    │    └─BasicConv2d: 3-11            12,416
│   

In [None]:
from transformers import Trainer, TrainingArguments
from sklearn.metrics import classification_report,f1_score
batch_size = 32
def collate_fn(batch):
    return {
        'pixel_values': torch.cat([x['pixel_values'] for x in batch],dim=0),
        # 'labels': torch.stack([x['labels'] for x in batch])
    }

def compute_metrics(pred):
    logits = pred.predictions
    labels=pred.label_ids
    probs = np.stack([np.argmax(logit,axis=1) for logit in logits])
    probs=probs.T
    f1s=[]
    for i in range(labels.shape[1]):
        non_padding_indices = [j for j, label in enumerate(labels[:,i]) if label != -100]
        labels_ = [labels[j,i] for j in non_padding_indices]
        probs_ = [probs[j,i] for j in non_padding_indices]
        micro=f1_score(labels_,probs_,average='micro')
        macro=f1_score(labels_,probs_,average='macro')
        print(f"attr_{i} f1 score: {macro}")
        # print(classification_report(labels_,probs_))
        score=2*(micro*macro)/(micro+macro)
        f1s.append(score)
    print()
    print()
    return {'score': sum(f1s)/len(f1s)}


    # logits = pred.predictions
    # labels=pred.label_ids
    # probs = np.stack([np.argmax(logit,axis=1) for logit in logits])
    # probs=probs.T
    # labels=labels.flatten()
    # probs=probs.flatten()
    # non_padding_indices = [i for i, label in enumerate(labels) if label != -100]
    # labels = [labels[i] for i in non_padding_indices]
    # probs = [probs[i] for i in non_padding_indices]
    # print(classification_report(labels,probs))
    # report=classification_report(labels,probs,output_dict=True)
    # return {'accuracy': report['accuracy'],"macro avg f1":report['macro avg']['f1-score']}

training_args = TrainingArguments(
  output_dir="./vit6/"+category,
  per_device_train_batch_size=128,
  per_device_eval_batch_size=128,
  evaluation_strategy="epoch",
  save_strategy="epoch",
  logging_strategy="epoch",
  num_train_epochs=10,
  bf16=True,
  learning_rate=2e-4,
#   dataloader_num_workers=20,lr
  save_total_limit=1,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='wandb',
  load_best_model_at_end=True,
  metric_for_best_model="score"
)

trainer = Trainer(
    model,
    training_args,
    train_dataset=train_fashion_data,
    eval_dataset=val_fashion_data,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    # tokenizer=processor,
)

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [None]:
# trainer.train()
# trainer.save_model(f"./vit6/{category}/final")
# trainer.evaluate(val_fashion_data)

[2024-11-16 23:42:29,768] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/home/aseems/anaconda3/envs/mhcp4/compiler_compat/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status






attr_0 f1 score: 0.4460195680365554
attr_1 f1 score: 0.6607053541728461
attr_2 f1 score: 0.8411579467863991
attr_3 f1 score: 0.42318080336469294
attr_4 f1 score: 0.5020157989732551
attr_5 f1 score: 0.5530085320852677
attr_6 f1 score: 0.5248556597209413
attr_7 f1 score: 0.46377664041564426
attr_8 f1 score: 0.593492290796093
attr_9 f1 score: 0.46222352391407967




Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkaran21258[0m ([33mkaran912[0m). Use [1m`wandb login --relogin`[0m to force relogin


{'eval_loss': 5.8802642822265625,
 'eval_model_preparation_time': 0.0076,
 'eval_score': 0.6291425042174301,
 'eval_runtime': 33.7381,
 'eval_samples_per_second': 54.39,
 'eval_steps_per_second': 0.445}

In [29]:
y_pred=trainer.predict(test_fashion_data)
logits = y_pred.predictions
probs = np.stack([np.argmax(logit,axis=1) for logit in logits])
probs=probs.T
l=[]
for i in range(len(probs)):
    x=[]
    for j in range(len(probs[i])):
        x.append(id2label[j][probs[i][j]])
    l.append(x)

test_df['len']=len(l[0])
for i in range(10):
    x=[]
    for j in range(len(l)):
        if i<len(l[0]) and l[j][i]!=np.nan:
            x.append(l[j][i])
        else:
            x.append(np.nan)
    test_df[f"attr_{i+1}"]=x
print(test_df.isna().sum())
test_df.to_csv(f"preds/{category}.csv",index=False)

KeyboardInterrupt: 