In [1]:
!pip install peft
from PIL import Image
import requests
from transformers import CLIPProcessor, CLIPModel
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import os
from peft import LoraConfig, get_peft_model
import wandb



In [2]:
def get_image_paths_and_labels_from_df(df, data_dir):
    article_ids = df["article_id"].values
    image_paths = []
    labels = []
    
    for article_id in article_ids:
        image_path = f"{data_dir}/images/0{str(article_id)[:2]}/0{article_id}.jpg"
        # Check if the image file exists
        if os.path.exists(image_path):
            image_paths.append(image_path)
            # Add corresponding label only if the image exists
            labels.append(df[df["article_id"] == article_id])

    return image_paths, labels

class ImageDataset(torch.utils.data.Dataset):
    def __init__(self, image_paths, processor=None):
        self.image_paths = image_paths
        self.processor = processor
        self.image_ids = []

        for image_path in self.image_paths:
            if not os.path.exists(image_path):
                raise FileNotFoundError(f"Image {image_path} not found.")
            else:
                image_id = int(image_path.split("/")[-1].split(".")[0])
                self.image_ids.append(image_id)
            

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx])
        if self.processor is not None:
            inputs = self.processor(images=image, return_tensors="pt", padding=True)
            image = inputs["pixel_values"][0]
        return image, self.image_ids[idx]

In [3]:
# set random seed 42
torch.manual_seed(42)

<torch._C.Generator at 0x7f657bede6d0>

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir="model", local_files_only=False)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir="model", local_files_only=False)

model = model.to(device)



In [5]:
text_path = 'data/articles.csv'
articles = pd.read_csv(text_path)
data_dir = 'data'

In [6]:
# map from article_id to df index
article_id_to_idx = {article_id: idx for idx, article_id in enumerate(articles["article_id"])}

# get all classes of the dataframe
class_names = articles.columns.tolist()
label_names = dict()
label_names_to_idx = dict()
for class_name in class_names:
    label_names[class_name] = articles[class_name].unique()
    label_names_to_idx[class_name] = {label_name: idx for idx, label_name in enumerate(label_names[class_name])}

article_ids = label_names["article_id"]
#selected_class_names = ["product_type_name", "graphical_appearance_name"]
selected_class_names = ["product_group_name", "product_type_name", "graphical_appearance_name", "colour_group_name", "perceived_colour_value_name", "perceived_colour_master_name", "department_name", "index_name", "index_group_name", "section_name", "garment_group_name"]

In [7]:
# grouped by product_code
grouped = articles.groupby("product_code")
groups = [group for _, group in grouped]

# split 0.8/0.1/0.1
train_groups, test_groups = train_test_split(groups, test_size=0.2, random_state=42) 
val_groups, test_groups = train_test_split(test_groups, test_size=0.5, random_state=42) 

train_df = pd.concat(train_groups)
val_df = pd.concat(val_groups)
test_df = pd.concat(test_groups)

print(f"{len(train_df)=} {len(val_df)=} {len(test_df)=}")

len(train_df)=84445 len(val_df)=10534 len(test_df)=10563


In [8]:
train_paths, train_labels = get_image_paths_and_labels_from_df(train_df, data_dir)
val_paths, val_labels = get_image_paths_and_labels_from_df(val_df, data_dir)
test_paths, test_labels = get_image_paths_and_labels_from_df(test_df, data_dir)

In [9]:
class MultiOutputLayer(torch.nn.Module):
    def __init__(self, input_size, inter_size, output_size):
        super(MultiOutputLayer, self).__init__()
        self.fc1 = torch.nn.Linear(input_size, inter_size)
        self.fc2 = torch.nn.Linear(inter_size, output_size)
        self.dropout = torch.nn.Dropout(0.5)
        self.act = torch.nn.SiLU()

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [10]:
class MultiOutputClipModel(torch.nn.Module):
    def __init__(self, clip_model, class_names, vision_hidden_size, inter_size, output_size):
        super(MultiOutputClipModel, self).__init__()
        self.clip_model = clip_model
        self.class_names = class_names
        self.output_layers = torch.nn.ModuleDict({
            class_name: MultiOutputLayer(vision_hidden_size, inter_size, output_size)
            for class_name in self.class_names
        })
    
    def forward(
        self,
        text_input_dict,
        pixel_values,
        # position_ids = None,
        output_attentions = None,
        output_hidden_states = None,
        return_dict = None,
    ):

        output_attentions = output_attentions if output_attentions is not None else self.clip_model.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.clip_model.config.output_hidden_states
        )
        return_dict = return_dict if return_dict is not None else self.clip_model.config.use_return_dict

        vision_outputs = self.clip_model.vision_model(
            pixel_values=pixel_values,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        vision_embeds = vision_outputs[1]
        vision_embeds_dict = {
            class_name: output_layer(vision_embeds) 
                for class_name, output_layer in self.output_layers.items()
        }

        text_outputs_dict = {
            class_name: self.clip_model.text_model(
                input_ids=text_input_dict[class_name]["input_ids"],
                attention_mask=text_input_dict[class_name]["attention_mask"],
                # position_ids=position_ids,
                output_attentions=output_attentions,
                output_hidden_states=output_hidden_states,
                return_dict=return_dict,
            ) for class_name in self.class_names
        }

        text_embeds_dict = {
            class_name: self.clip_model.text_projection(text_outputs[1])
                for class_name, text_outputs in text_outputs_dict.items()
        }

        logits_per_image_dict = {
            class_name: vision_embeds_dict[class_name] @ text_embeds_dict[class_name].T
                for class_name in self.output_layers.keys()
        }

        return logits_per_image_dict

In [11]:
# custom criterion: cross entropy loss across all classes
class MultiOutputClipCriterion(torch.nn.Module):
    def __init__(self, class_names):
        super(MultiOutputClipCriterion, self).__init__()
        self.class_names = class_names
        self.criterion = torch.nn.CrossEntropyLoss()

    def forward(self, logits_dict, labels_dict):
        loss = 0
        for class_name in self.class_names:
            logits = logits_dict[class_name]
            labels = labels_dict[class_name]
            loss += self.criterion(logits, labels)
        return loss

In [12]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"

train_dataset = ImageDataset(train_paths, processor)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=8)

val_dataset = ImageDataset(val_paths, processor)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=256, shuffle=False, num_workers=8)

test_dataset = ImageDataset(test_paths, processor)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=256, shuffle=False, num_workers=8)

In [13]:
def validate(model, dataloader, criteria, device, text_inputs, class_names):
    model.eval()
    total_loss = 0.0
    total_correct = {class_name: 0 for class_name in class_names}
    total_samples = 0

    with torch.no_grad():
        for images, image_ids in tqdm(dataloader):
            images = images.to(device)
            logits_per_image_dict = model(pixel_values=images, text_input_dict=text_inputs)

            # 获取真实标签
            true_labels_dict = {
                class_name: [label_names_to_idx[class_name][articles.loc[article_id_to_idx[image_id.item()], class_name]] 
                             for image_id in image_ids]
                for class_name in class_names
            }
            true_labels_dict = {class_name: torch.tensor(true_labels).to(device)
                                for class_name, true_labels in true_labels_dict.items()}
            
            # 计算损失
            loss = criteria(logits_per_image_dict, true_labels_dict)
            total_loss += loss.item() * images.size(0)

            # 计算准确率
            total_samples += images.size(0)
            for class_name in class_names:
                _, preds = torch.max(logits_per_image_dict[class_name], dim=1)
                total_correct[class_name] += (preds == true_labels_dict[class_name]).sum().item()

    avg_loss = total_loss / total_samples / len(class_names)
    accuracy = {class_name: total_correct[class_name] / total_samples for class_name in class_names}
    return avg_loss, accuracy

In [14]:
# get current date and time
from datetime import datetime
now = datetime.now()
date = now.strftime("%Y%m%d-%H%M%S")

In [15]:
def train(model, train_dataloader, val_dataloader, device, text_input_dict, num_epochs=10, lora_rank=8, lora_alpha=32, lr=1e-4, bias="none", inter_size=128):

    wandb.init(project="multi-output-clip", name=f"{date}_r{lora_rank}_lr{lr}_bias{bias}_inter{inter_size}")
    step = 0

    # Define LoRA configuration
    lora_config = LoraConfig(
        r=lora_rank,                  # Low-rank dimension (adjustable)
        lora_alpha=lora_alpha,          # Scaling factor (adjustable)
        target_modules=["q_proj", "v_proj", "k_proj"],  # Specify which layers to apply LoRA to
        lora_dropout=0.05,       # Dropout rate (optional)
        bias=bias,            # Whether to include biases ("none", "all", "lora_only")
        task_type="classification"  # Task type ("classification" or "regression")
    )
    model = get_peft_model(model, lora_config)

    mo_model = MultiOutputClipModel(model, selected_class_names, 768, inter_size, 512).to(device)

    criteria = MultiOutputClipCriterion(selected_class_names)
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

    for epoch in range(num_epochs):
        mo_model.train()
        total_loss = 0.0
        total_correct = 0
        total_samples = 0

        for images, image_ids in tqdm(train_dataloader):
            images = images.to(device)
            logits_per_image_dict = mo_model(pixel_values=images, text_input_dict=text_input_dict)

            # 获取真实标签
            true_labels_dict = {
                class_name: [label_names_to_idx[class_name][articles.loc[article_id_to_idx[image_id.item()], class_name]] 
                            for image_id in image_ids]
                for class_name in selected_class_names
            }
            true_labels_dict = {class_name: torch.tensor(true_labels).to(device) 
                                for class_name, true_labels in true_labels_dict.items()}

            # 计算损失
            loss = criteria(logits_per_image_dict, true_labels_dict)
            total_loss += loss.item() * images.size(0)

            # 计算准确率
            correct = 0
            total_samples += images.size(0)
            for class_name in selected_class_names:
                _, preds = torch.max(logits_per_image_dict[class_name], dim=1)
                correct += (preds == true_labels_dict[class_name]).sum().item()
            total_correct += correct

            # 反向传播和优化
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # 记录训练损失和准确率到 wandb
            # 在训练循环中，记录每个类别的准确率
            log_dict = {
                "train_loss": loss.item(),
                "train_accuracy": correct / images.size(0) / len(selected_class_names)
            }
    #         for class_name in selected_class_names:
    #             accuracy = total_correct_per_class[class_name] / total_samples
    #             log_dict[f"train_accuracy_{class_name}"] = accuracy

            wandb.log(log_dict, step=step)
            step += 1

        avg_loss = total_loss / total_samples / len(selected_class_names)
        accuracy = total_correct / total_samples / len(selected_class_names)
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_loss:.4f}, Train Accuracy: {accuracy:.4f}")

        # 在每个 epoch 结束后进行验证
        val_loss, val_accuracy_dict = validate(mo_model, val_dataloader, criteria, device, text_input_dict, selected_class_names)
        val_accuracy = sum(val_accuracy_dict.values()) / len(val_accuracy_dict)
        print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

            # 记录验证损失和每个类别的准确率到 wandb
        log_dict = {
            "val_loss": val_loss,
            "val_accuracy": val_accuracy
        }
        for class_name, accuracy in val_accuracy_dict.items():
            log_dict[f"val_accuracy_{class_name}"] = accuracy

        wandb.log(log_dict, step=step)
        torch.save(mo_model.state_dict(), f"{date}_r{lora_rank}_lr{lr}_bias{bias}_inter{inter_size}_epoch{epoch}.pth")

    wandb.finish()

In [16]:
# generate text input
text_input_dict = {
    class_name: processor(text=[f"A photo of a {label}" for label in label_names[class_name]], 
                          return_tensors="pt", padding=True).to(device)
    for class_name in selected_class_names
}

In [17]:
# model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir="model", local_files_only=False)
# train(model, train_dataloader, val_dataloader, device, text_input_dict, num_epochs=10, lora_rank=8, lr=1e-4, bias="none", inter_size=128)


In [None]:
# grid search for lora dimension, learning rate, and inter size
lora_dimensions = [8, 16, 32]
learning_rates = [1e-5, 1e-4, 1e-3]
inter_sizes = [64, 128, 256]

In [None]:
lr = 1e-4
for lora_rank in lora_dimensions:
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir="model", local_files_only=False)
    train(model, train_dataloader, val_dataloader, device, text_input_dict, num_epochs=10, lora_rank=lora_rank, lora_alpha=lora_rank*2, lr=lr, bias="none", inter_size=128)


[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33monjackay[0m ([33monjackay-kth-royal-institute-of-technology[0m). Use [1m`wandb login --relogin`[0m to force relogin


100%|██████████| 657/657 [23:10<00:00,  2.12s/it]


Epoch [1/10], Train Loss: 2.0505, Train Accuracy: 0.4325


100%|██████████| 41/41 [03:24<00:00,  4.99s/it]


Validation Loss: 1.4391, Validation Accuracy: 0.5794


100%|██████████| 657/657 [23:29<00:00,  2.14s/it]


Epoch [2/10], Train Loss: 1.4475, Train Accuracy: 0.5730


100%|██████████| 41/41 [03:02<00:00,  4.45s/it]


Validation Loss: 1.2475, Validation Accuracy: 0.6285


100%|██████████| 657/657 [23:19<00:00,  2.13s/it]


Epoch [3/10], Train Loss: 1.3020, Train Accuracy: 0.6111


100%|██████████| 41/41 [03:21<00:00,  4.92s/it]


Validation Loss: 1.1616, Validation Accuracy: 0.6509


100%|██████████| 657/657 [25:33<00:00,  2.33s/it]


Epoch [4/10], Train Loss: 1.2174, Train Accuracy: 0.6344


100%|██████████| 41/41 [03:17<00:00,  4.82s/it]


Validation Loss: 1.1078, Validation Accuracy: 0.6651


100%|██████████| 657/657 [23:50<00:00,  2.18s/it]


Epoch [5/10], Train Loss: 1.1563, Train Accuracy: 0.6509


100%|██████████| 41/41 [03:26<00:00,  5.04s/it]


Validation Loss: 1.0618, Validation Accuracy: 0.6783


100%|██████████| 657/657 [24:15<00:00,  2.22s/it]


Epoch [6/10], Train Loss: 1.1111, Train Accuracy: 0.6629


100%|██████████| 41/41 [03:12<00:00,  4.69s/it]


Validation Loss: 1.0343, Validation Accuracy: 0.6860


100%|██████████| 657/657 [23:57<00:00,  2.19s/it]


Epoch [7/10], Train Loss: 1.0736, Train Accuracy: 0.6738


100%|██████████| 41/41 [03:01<00:00,  4.44s/it]


Validation Loss: 1.0075, Validation Accuracy: 0.6933


100%|██████████| 657/657 [23:53<00:00,  2.18s/it]


Epoch [8/10], Train Loss: 1.0401, Train Accuracy: 0.6827


100%|██████████| 41/41 [03:17<00:00,  4.81s/it]


Validation Loss: 0.9931, Validation Accuracy: 0.6973


100%|██████████| 657/657 [24:02<00:00,  2.20s/it]


Epoch [9/10], Train Loss: 1.0127, Train Accuracy: 0.6907


100%|██████████| 41/41 [03:17<00:00,  4.81s/it]


Validation Loss: 0.9765, Validation Accuracy: 0.7043


100%|██████████| 657/657 [28:07<00:00,  2.57s/it]


Epoch [10/10], Train Loss: 0.9896, Train Accuracy: 0.6971


100%|██████████| 41/41 [03:32<00:00,  5.17s/it]


Validation Loss: 0.9752, Validation Accuracy: 0.7028


VBox(children=(Label(value='0.007 MB of 0.007 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_accuracy,▁▃▄▄▅▅▅▆▆▇▆▆▆▆▇▇▆▇▇▇▇▇▇▇▇▇█▇▇██▇▇▇██████
train_loss,█▇▅▃▃▃▃▃▂▃▂▃▂▃▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▂▁▁▁▁▁▁▁▁▂
val_accuracy,▁▄▅▆▇▇▇███
val_accuracy_colour_group_name,▁▄▆▆▆▇▇███
val_accuracy_department_name,▁▃▅▆▆▇▇▇██
val_accuracy_garment_group_name,▁▄▅▆▆▇▇▇██
val_accuracy_graphical_appearance_name,▁▄▅▆▅▆▆█▇█
val_accuracy_index_group_name,▁▃▄▆▇▇▇███
val_accuracy_index_name,▁▄▅▆▇▇▇▇██
val_accuracy_perceived_colour_master_name,▁▅▆▆▆▇▇███

0,1
train_accuracy,0.7
train_loss,10.32464
val_accuracy,0.70278
val_accuracy_colour_group_name,0.677
val_accuracy_department_name,0.45785
val_accuracy_garment_group_name,0.70294
val_accuracy_graphical_appearance_name,0.7346
val_accuracy_index_group_name,0.79687
val_accuracy_index_name,0.72277
val_accuracy_perceived_colour_master_name,0.78972


100%|██████████| 657/657 [29:34<00:00,  2.70s/it]


Epoch [1/10], Train Loss: 1.8596, Train Accuracy: 0.4760


100%|██████████| 41/41 [03:36<00:00,  5.29s/it]


Validation Loss: 1.3010, Validation Accuracy: 0.6153


100%|██████████| 657/657 [30:12<00:00,  2.76s/it]  


Epoch [2/10], Train Loss: 1.3200, Train Accuracy: 0.6054


100%|██████████| 41/41 [03:17<00:00,  4.82s/it]


Validation Loss: 1.1455, Validation Accuracy: 0.6531


100%|██████████| 657/657 [30:48<00:00,  2.81s/it]


Epoch [3/10], Train Loss: 1.1822, Train Accuracy: 0.6428


100%|██████████| 41/41 [03:47<00:00,  5.54s/it]


Validation Loss: 1.0590, Validation Accuracy: 0.6789


100%|██████████| 657/657 [30:57<00:00,  2.83s/it]


Epoch [4/10], Train Loss: 1.0923, Train Accuracy: 0.6686


100%|██████████| 41/41 [03:35<00:00,  5.26s/it]


Validation Loss: 1.0130, Validation Accuracy: 0.6900


100%|██████████| 657/657 [29:53<00:00,  2.73s/it]


Epoch [5/10], Train Loss: 1.0310, Train Accuracy: 0.6855


100%|██████████| 41/41 [03:27<00:00,  5.07s/it]


Validation Loss: 0.9803, Validation Accuracy: 0.7001


100%|██████████| 657/657 [28:47<00:00,  2.63s/it]


Epoch [6/10], Train Loss: 0.9813, Train Accuracy: 0.6994


100%|██████████| 41/41 [03:20<00:00,  4.88s/it]


Validation Loss: 0.9596, Validation Accuracy: 0.7039


100%|██████████| 657/657 [30:20<00:00,  2.77s/it]


Epoch [7/10], Train Loss: 0.9415, Train Accuracy: 0.7102


  0%|          | 0/41 [00:00<?, ?it/s]

In [17]:
lora_dimensions = [32]

lr = 1e-4
for lora_rank in lora_dimensions:
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir="model", local_files_only=False)
    train(model, train_dataloader, val_dataloader, device, text_input_dict, num_epochs=10, lora_rank=lora_rank, lora_alpha=lora_rank*2, lr=lr, bias="none", inter_size=128)


[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33monjackay[0m ([33monjackay-kth-royal-institute-of-technology[0m). Use [1m`wandb login --relogin`[0m to force relogin


100%|██████████| 657/657 [25:49<00:00,  2.36s/it]


Epoch [1/10], Train Loss: 1.7068, Train Accuracy: 0.5119


100%|██████████| 41/41 [03:25<00:00,  5.02s/it]


Validation Loss: 1.2077, Validation Accuracy: 0.6344


100%|██████████| 657/657 [27:11<00:00,  2.48s/it]


Epoch [2/10], Train Loss: 1.2023, Train Accuracy: 0.6364


100%|██████████| 41/41 [03:29<00:00,  5.11s/it]


Validation Loss: 1.0431, Validation Accuracy: 0.6808


100%|██████████| 657/657 [26:05<00:00,  2.38s/it]


Epoch [3/10], Train Loss: 1.0620, Train Accuracy: 0.6769


100%|██████████| 41/41 [03:21<00:00,  4.91s/it]


Validation Loss: 0.9803, Validation Accuracy: 0.6984


100%|██████████| 657/657 [27:03<00:00,  2.47s/it]


Epoch [4/10], Train Loss: 0.9719, Train Accuracy: 0.7020


100%|██████████| 41/41 [03:20<00:00,  4.90s/it]


Validation Loss: 0.9360, Validation Accuracy: 0.7116


100%|██████████| 657/657 [24:46<00:00,  2.26s/it]


Epoch [5/10], Train Loss: 0.9046, Train Accuracy: 0.7216


100%|██████████| 41/41 [03:07<00:00,  4.57s/it]


Validation Loss: 0.9096, Validation Accuracy: 0.7193


100%|██████████| 657/657 [26:06<00:00,  2.38s/it]


Epoch [6/10], Train Loss: 0.8501, Train Accuracy: 0.7367


100%|██████████| 41/41 [03:19<00:00,  4.88s/it]


Validation Loss: 0.8968, Validation Accuracy: 0.7262


100%|██████████| 657/657 [26:50<00:00,  2.45s/it]


Epoch [7/10], Train Loss: 0.8026, Train Accuracy: 0.7506


100%|██████████| 41/41 [03:17<00:00,  4.83s/it]


Validation Loss: 0.8963, Validation Accuracy: 0.7273


100%|██████████| 657/657 [26:15<00:00,  2.40s/it]


Epoch [8/10], Train Loss: 0.7603, Train Accuracy: 0.7625


100%|██████████| 41/41 [03:14<00:00,  4.73s/it]


Validation Loss: 0.8939, Validation Accuracy: 0.7311


100%|██████████| 657/657 [26:09<00:00,  2.39s/it]


Epoch [9/10], Train Loss: 0.7237, Train Accuracy: 0.7736


100%|██████████| 41/41 [03:17<00:00,  4.81s/it]


Validation Loss: 0.9015, Validation Accuracy: 0.7335


100%|██████████| 657/657 [26:47<00:00,  2.45s/it]


Epoch [10/10], Train Loss: 0.6891, Train Accuracy: 0.7835


100%|██████████| 41/41 [03:16<00:00,  4.79s/it]


Validation Loss: 0.9123, Validation Accuracy: 0.7338


VBox(children=(Label(value='0.007 MB of 0.007 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_accuracy,▁▁▃▄▄▅▅▆▆▅▆▆▆▆▆▇▆▇▇▇▇▇▇▇▇█▇▇▇▇▇█▇▇▇▇██▇█
train_loss,█▆▄▄▄▃▃▃▃▃▃▃▂▃▃▂▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▄▆▆▇▇████
val_accuracy_colour_group_name,▁▄▅▆▆▇▇█▇█
val_accuracy_department_name,▁▄▅▆▇▇▇███
val_accuracy_garment_group_name,▁▅▅▆▇█▇███
val_accuracy_graphical_appearance_name,▁▄▅▆▇▇▇███
val_accuracy_index_group_name,▁▄▆▇▇█▇███
val_accuracy_index_name,▁▅▆▇▇█████
val_accuracy_perceived_colour_master_name,▁▄▅▆▆▇██▇█

0,1
train_accuracy,0.78017
train_loss,7.58952
val_accuracy,0.7338
val_accuracy_colour_group_name,0.70256
val_accuracy_department_name,0.51364
val_accuracy_garment_group_name,0.73508
val_accuracy_graphical_appearance_name,0.74871
val_accuracy_index_group_name,0.82443
val_accuracy_index_name,0.76235
val_accuracy_perceived_colour_master_name,0.80374
