In [48]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import torch
import pandas as pd
from PIL import Image
import numpy as np
from torch.utils.data import Dataset
import torch
import torch.nn as nn
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")
import random
from transformers import (
    ViTImageProcessor,
    ViTModel,
    ViTConfig,
    ViTPreTrainedModel,
    Trainer, 
    TrainingArguments,
    )
import torchvision
from sklearn.model_selection import train_test_split
import sys
from typing import List
from sklearn.metrics import classification_report
import gc
import argparse
import wandb
import transformers
from sklearn.metrics import classification_report,f1_score
import torch.nn.init as init


In [None]:
epochs=10

In [50]:
CFG = dict(
    epochs = epochs,
    category = 'women-tshirt',
    dropout = True,
    dropout_prob = 0.2,
    dropna = True,
    bestmodel = True
)

In [52]:

model_name = 'google/vit-base-patch16-224'
save_dir="./vit3/"
DEVICE="cuda:0"
def setAllSeeds(seed):
  os.environ['MY_GLOBAL_SEED'] = str(seed)
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
# setAllSeeds(42)



In [53]:

df = pd.read_csv("train.csv")
categories=df["Category"].unique()
categories_idx= 1
category=categories[categories_idx]
df = df[df["Category"]==category]
save_dir+=category


In [54]:

delCol = []
trackNum = []
for i in range(1,11):
    uniName = df["attr_"+str(i)].dropna().unique()
    if(len(uniName)==0):
        delCol.append("attr_"+str(i))
    else:
        trackNum.append(len(uniName))
df = df.drop(delCol,axis=1)


id2label={}
label2id={}
attrs={}
total_attr=len(df.columns)
for i in range(3,total_attr):
    labels=df[df.columns[i]].dropna().unique()
    id2label[i-3]={k:labels[k] for k in range(len(labels))}
    label2id[i-3]={labels[k]:k for k in range(len(labels))}
    attrs[i-3]=df.columns[i]
print(id2label)
print(label2id)
print(attrs)


{0: {0: 'multicolor', 1: 'yellow', 2: 'black', 3: 'default', 4: 'pink', 5: 'maroon', 6: 'white'}, 1: {0: 'loose', 1: 'boxy', 2: 'regular'}, 2: {0: 'long', 1: 'crop', 2: 'regular'}, 3: {0: 'default', 1: 'solid', 2: 'printed'}, 4: {0: 'default', 1: 'quirky', 2: 'solid', 3: 'graphic', 4: 'funky print', 5: 'typography'}, 5: {0: 'default', 1: 'long sleeves', 2: 'short sleeves'}, 6: {0: 'regular sleeves', 1: 'cuffed sleeves'}, 7: {0: 'default', 1: 'applique'}}
{0: {'multicolor': 0, 'yellow': 1, 'black': 2, 'default': 3, 'pink': 4, 'maroon': 5, 'white': 6}, 1: {'loose': 0, 'boxy': 1, 'regular': 2}, 2: {'long': 0, 'crop': 1, 'regular': 2}, 3: {'default': 0, 'solid': 1, 'printed': 2}, 4: {'default': 0, 'quirky': 1, 'solid': 2, 'graphic': 3, 'funky print': 4, 'typography': 5}, 5: {'default': 0, 'long sleeves': 1, 'short sleeves': 2}, 6: {'regular sleeves': 0, 'cuffed sleeves': 1}, 7: {'default': 0, 'applique': 1}}
{0: 'attr_1', 1: 'attr_2', 2: 'attr_3', 3: 'attr_4', 4: 'attr_5', 5: 'attr_6', 6: 

In [55]:

def categorize(example):
    for i in attrs:
        # print(example[attrs[i]],type(example[attrs[i]]),pd.isna(example[attrs[i]]))
        if not pd.isna(example[attrs[i]]):
            example[attrs[i]]=label2id[i][example[attrs[i]]]
        else:
            example[attrs[i]]=-100
    return example

df=df.apply(categorize,axis=1)
effNetWeights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
# effNetTransforms = effNetWeights.transforms()
processor = effNetWeights.transforms()
processor2 = ViTImageProcessor.from_pretrained(model_name)

#train test split
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)




In [56]:

class CustomFashionManager(Dataset):
    def __init__(self,csv_file, root_dir="./",transforms =None):
        self.fashionItems = csv_file
        self.root_dir = root_dir
        self.transforms = transforms
    
    def __len__(self):
        return len(self.fashionItems)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir,f"{self.fashionItems.iloc[idx, 0]:06d}"+'.jpg')
        image = Image.open(img_name)
        attributes = self.fashionItems.iloc[idx, 3:]
        attributes = np.array(attributes)
        attributes = attributes.astype('float')
        inp_image=processor(image)
        inp_image = inp_image.reshape(1,inp_image.shape[0],inp_image.shape[1],inp_image.shape[2])
#         print(inp_image.shape)
        inputs = {'pixel_values':inp_image}
#         print(processor2(image,return_tensors='pt')['pixel_values'].shape)
        # if self.transforms:
        #     inputs = self.transforms(inputs)
        inputs['labels']=torch.tensor(attributes, dtype=torch.long)
        return inputs

train_fashion_data = CustomFashionManager(csv_file=train_df,root_dir='/kaggle/input/visualtaxonomy/train_images')
val_fashion_data = CustomFashionManager(csv_file=val_df,root_dir='/kaggle/input/visualtaxonomy/train_images')

In [57]:

class CustomConfig(ViTConfig):
    def __init__(self,num_classes_per_label:List[int]=[1],**kwargs):
        super().__init__(**kwargs)
        self.num_classes_per_label = num_classes_per_label

class MultiLabelMultiClassEff(torch.nn.Module):
#     config_class=CustomConfig
    def __init__(self,num_classes_per_label) -> None:
        super().__init__()

#         self.vit = ViTModel(config, add_pooling_layer=False)
#         self.classifiers = nn.ModuleList([
#             nn.Sequential(nn.Dropout(0.2),
#             nn.Linear(config.hidden_size, num_classes)) 
#             for num_classes in config.num_classes_per_label
#         ])
        effNetWeights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
        effNetTransforms = effNetWeights.transforms()
        effNet = torchvision.models.efficientnet_b2(weights=effNetWeights)
        effNet.classifier = nn.Sequential(
#                                     nn.Dropout(p=0.3,inplace=True),
                                nn.Linear(1408,768,bias=True))
        self.eff = effNet
        self.classifiers = nn.ModuleList([
            nn.Sequential(nn.Dropout(0.2),
            nn.Linear(768, num_classes)) 
            for num_classes in num_classes_per_label
        ])
        # Initialize weights and apply final processing
#         self.post_init()
    
    def reinitialize_weights(self):
    # Reinitialize ViT layers
        for module in self.vit.modules():
            if isinstance(module, (nn.Linear, nn.Conv2d)):
                init.xavier_uniform_(module.weight)
                if module.bias is not None:
                    init.zeros_(module.bias)

        # Reinitialize classifiers
        for classifier in self.classifiers:
            init.xavier_uniform_(classifier.weight)
            if classifier.bias is not None:
                init.zeros_(classifier.bias)

    
    def forward(self, pixel_values,labels=None):
        outputs = self.eff(pixel_values)  # CLS token representation
#         print(outputs.shape)
        logits = [classifier(outputs) for classifier in self.classifiers]
        if labels is not None:
            loss=0
            for i in range(len(logits)):
                target=labels[:,i]
                loss += torch.nn.functional.cross_entropy(logits[i], target)
            return {"loss": loss, "logits": logits}
        return {"logits": logits}

class MultiLabelMultiClassViT(torch.nn.Module):
    def __init__(self,num_classes_per_label):
        super().__init__()
        self.model = MultiLabelMultiClassEff(num_classes_per_label)
    def save_pretrained(self,output_dir):
#         self.model.save
        torch.save(self.model.state_dict(), f'{output_dir}/effmodel.pt')
    def forward(self, pixel_values,labels=None):
        return self.model.forward(pixel_values,labels)
    
def collate_fn(batch):
    return {
        'pixel_values': torch.cat([x['pixel_values'] for x in batch],dim=0),
        'labels': torch.stack([x['labels'] for x in batch])
    }

def compute_metrics(pred):
    logits = pred.predictions
    labels=pred.label_ids
    probs = np.stack([np.argmax(logit,axis=1) for logit in logits])
    probs=probs.T
    labels=labels.flatten()
    probs=probs.flatten()
    non_padding_indices = [i for i, label in enumerate(labels) if label != -100]
    labels = [labels[i] for i in non_padding_indices]
    probs = [probs[i] for i in non_padding_indices]
    report=classification_report(labels,probs,output_dict=True)
    return {'accuracy': report['accuracy'],"macro avg f1":report['macro avg']['f1-score']}


In [58]:
def compute_metrics3(pred):
    logits = pred.predictions
    labels=pred.label_ids
    probs = np.stack([np.argmax(logit,axis=1) for logit in logits])
    probs=probs.T
    f1s=[]
    for i in range(labels.shape[1]):
        non_padding_indices = [j for j, label in enumerate(labels[:,i]) if label != -100]
        labels_ = [labels[j,i] for j in non_padding_indices]
        probs_ = [probs[j,i] for j in non_padding_indices]
        micro=f1_score(labels_,probs_,average='micro')
        macro=f1_score(labels_,probs_,average='macro')
        print(f"attr_{i+1} f1 score: {macro}")
        # print(classification_report(labels_,probs_))
        score=2*(micro*macro)/(micro+macro)
        f1s.append(score)
    
    wandb.log({'score': sum(f1s)/len(f1s)})
    return {'score': sum(f1s)/len(f1s)}


In [None]:

# config=ViTConfig.from_pretrained(model_name)
# config=CustomConfig(num_classes_per_label=trackNum,**config.to_dict())
model = MultiLabelMultiClassEff(trackNum)
# model.reinitialize_weights()
training_args = TrainingArguments(
    output_dir="./eff/"+category+"basedropout",
    per_device_train_batch_size=128,
    per_device_eval_batch_size=128,
    evaluation_strategy="epoch",
    save_strategy="no",
    logging_strategy="epoch",
    num_train_epochs=5,
    fp16=True,
    learning_rate=2e-4,
    # save_total_limit=1,
    remove_unused_columns=False,
    report_to='wandb',
#   load_best_model_at_end=True,
    metric_for_best_model="score"
)

trainer = Trainer(
    model,
    training_args,
    train_dataset=train_fashion_data,
    eval_dataset=val_fashion_data,
    data_collator=collate_fn,
    compute_metrics=compute_metrics3,
    tokenizer=processor,
)
trainer.train()
#     trainer.save_model(f"./eff/{category}/finalbasedropout")
#     trainer.evaluate(test_fashion_data)

#     del model, trainer
#     torch.cuda.empty_cache()
#     gc.collect()




Epoch,Training Loss,Validation Loss,Score
1,4.1936,2.542357,0.796091


attr_1 f1 score: 0.7966287120369325
attr_2 f1 score: 0.5215411270916908
attr_3 f1 score: 0.7861004002746825
attr_4 f1 score: 0.8791000721824377
attr_5 f1 score: 0.6873304187083985
attr_6 f1 score: 0.8133558660334025
attr_7 f1 score: 0.8952917282755462
attr_8 f1 score: 0.4885844748858448
