In [1]:
import os
import shutil

import torch
import numpy as np
from torch import nn
from torchvision.transforms import transforms

import yaml

import logging
import sys
import csv
import torch
import torch.nn.functional as F
from torch.cuda.amp import GradScaler, autocast

from tqdm import tqdm
from torchvision.models import efficientnet_b0


In [2]:

torch.manual_seed(0)
np.random.seed(0)

# Utils

# **Augmentasi**
Dalam SimCLR, augmentasi data merupakan komponen kunci untuk menciptakan pasangan positif. Dua transformasi berbeda dari gambar yang sama digunakan agar model belajar representasi yang invarian terhadap gangguan visual.

Pada eksperimen ini, augmentasi dilakukan melalui: Random Resized Crop, Random Horizontal Flip, Color Jitter, Random Grayscale, Gaussian Blur

Komentar:
Untuk dataset kompleks seperti Tiny ImageNet yang memiliki banyak kelas dan citra beragam, strategi augmentasi yang kuat sangat penting. Augmentasi tersebut mendorong model belajar fitur yang semantik dan bukan hanya piksel mentah. Pemilihan augmentasi yang tepat juga terbukti membantu model mencapai konvergensi lebih cepat.

In [3]:

class GaussianBlur(object):
    """blur a single image on CPU"""
    def __init__(self, kernel_size):
        radias = kernel_size // 2
        kernel_size = radias * 2 + 1
        self.blur_h = nn.Conv2d(3, 3, kernel_size=(kernel_size, 1),
                                stride=1, padding=0, bias=False, groups=3)
        self.blur_v = nn.Conv2d(3, 3, kernel_size=(1, kernel_size),
                                stride=1, padding=0, bias=False, groups=3)
        self.k = kernel_size
        self.r = radias

        self.blur = nn.Sequential(
            nn.ReflectionPad2d(radias),
            self.blur_h,
            self.blur_v
        )

        self.pil_to_tensor = transforms.ToTensor()
        self.tensor_to_pil = transforms.ToPILImage()

    def __call__(self, img):
        img = self.pil_to_tensor(img).unsqueeze(0)

        sigma = np.random.uniform(0.1, 2.0)
        x = np.arange(-self.r, self.r + 1)
        x = np.exp(-np.power(x, 2) / (2 * sigma * sigma))
        x = x / x.sum()
        x = torch.from_numpy(x).view(1, -1).repeat(3, 1)

        self.blur_h.weight.data.copy_(x.view(3, 1, self.k, 1))
        self.blur_v.weight.data.copy_(x.view(3, 1, 1, self.k))

        with torch.no_grad():
            img = self.blur(img)
            img = img.squeeze()

        img = self.tensor_to_pil(img)

        return img
        
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')


def save_config_file(model_checkpoints_folder, args):
    if not os.path.exists(model_checkpoints_folder):
        os.makedirs(model_checkpoints_folder)
        with open(os.path.join(model_checkpoints_folder, 'config.yml'), 'w') as outfile:
            yaml.dump(args, outfile, default_flow_style=False)


def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res


# SimCLR

In [4]:

class SimCLR(object):

    def __init__(self, *args, **kwargs):
        self.args = kwargs['args']
        self.model = kwargs['model'].to(self.args.device)
        self.optimizer = kwargs['optimizer']
        self.scheduler = kwargs['scheduler']
        # self.writer = SummaryWriter()
        # logging.basicConfig(filename=os.path.join(self.writer.log_dir, 'training.log'), level=logging.DEBUG)
        self.criterion = torch.nn.CrossEntropyLoss().to(self.args.device)

        os.makedirs(self.args.log_dir, exist_ok=True)
        logging.basicConfig(filename=os.path.join(self.args.log_dir, 'training.log'),
                            level=logging.INFO,
                            format='%(asctime)s - %(levelname)s - %(message)s')

        self.csv_log_path = os.path.join(self.args.log_dir, 'metrics.csv')
        with open(self.csv_log_path, mode='w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(['step', 'loss', 'acc_top1', 'acc_top5', 'learning_rate'])

        

    def info_nce_loss(self, features):

        labels = torch.cat([torch.arange(self.args.batch_size) for i in range(self.args.n_views)], dim=0)
        labels = (labels.unsqueeze(0) == labels.unsqueeze(1)).float()
        labels = labels.to(self.args.device)

        features = F.normalize(features, dim=1)

        similarity_matrix = torch.matmul(features, features.T)
        # assert similarity_matrix.shape == (
        #     self.args.n_views * self.args.batch_size, self.args.n_views * self.args.batch_size)
        # assert similarity_matrix.shape == labels.shape

        # discard the main diagonal from both: labels and similarities matrix
        mask = torch.eye(labels.shape[0], dtype=torch.bool).to(self.args.device)
        labels = labels[~mask].view(labels.shape[0], -1)
        similarity_matrix = similarity_matrix[~mask].view(similarity_matrix.shape[0], -1)
        # assert similarity_matrix.shape == labels.shape

        # select and combine multiple positives
        positives = similarity_matrix[labels.bool()].view(labels.shape[0], -1)

        # select only the negatives the negatives
        negatives = similarity_matrix[~labels.bool()].view(similarity_matrix.shape[0], -1)

        logits = torch.cat([positives, negatives], dim=1)
        labels = torch.zeros(logits.shape[0], dtype=torch.long).to(self.args.device)

        logits = logits / self.args.temperature
        return logits, labels

    def train(self, train_loader):

        scaler = GradScaler(enabled=self.args.fp16_precision)

        # save config file
        # save_config_file(self.writer.log_dir, self.args)

        n_iter = 0
        best_top1 = 0.0  
        logging.info(f"Start SimCLR training for {self.args.epochs} epochs.")

        for epoch_counter in range(self.args.epochs):
            for images, _ in tqdm(train_loader):
                images = torch.cat(images, dim=0)

                images = images.to(self.args.device)

                with autocast(enabled=self.args.fp16_precision):
                    features = self.model(images)
                    logits, labels = self.info_nce_loss(features)
                    loss = self.criterion(logits, labels)

                self.optimizer.zero_grad()

                scaler.scale(loss).backward()

                scaler.step(self.optimizer)
                scaler.update()

                if n_iter % self.args.log_every_n_steps == 0:
                    top1, top5 = accuracy(logits, labels, topk=(1, 5))
                    lr = self.scheduler.get_last_lr()[0]

                    print(f"[Epoch {epoch_counter+1}] Step {n_iter}: Loss={loss:.4f}, Top1={top1[0]:.2f}%, Top5={top5[0]:.2f}%, LR={lr:.6f}")


                    # File log
                    with open(self.csv_log_path, mode='a', newline='') as f:
                        writer = csv.writer(f)
                        writer.writerow([n_iter, loss.item(), top1[0].item(), top5[0].item(), lr])

                    if top1[0].item() > best_top1:
                        best_top1 = top1[0].item()
                        best_ckpt_path = os.path.join(self.args.log_dir, 'checkpoint_best.pth')
                        save_checkpoint({
                            'epoch': epoch_counter + 1,
                            'arch': self.args.arch,
                            'state_dict': self.model.state_dict(),
                            'optimizer': self.optimizer.state_dict(),
                            'best_top1': best_top1
                        }, is_best=True, filename=best_ckpt_path)
                        logging.info(f"New best model saved with Top1={best_top1:.2f}% at epoch {epoch_counter+1}, step {n_iter}")

                    

                n_iter += 1

            # warmup for the first 10 epochs
            if epoch_counter >= 10:
                self.scheduler.step()
            logging.debug(f"Epoch: {epoch_counter}\tLoss: {loss}\tTop1 accuracy: {top1[0]}")

        logging.info("Training has finished.")
        # save model checkpoints
        checkpoint_name = os.path.join(self.args.log_dir, f'checkpoint_{self.args.epochs:04d}.pth.tar')
        
        save_checkpoint({
            'epoch': self.args.epochs,
            'arch': self.args.arch,
            'state_dict': self.model.state_dict(),
            'optimizer': self.optimizer.state_dict(),
        }, is_best=False, filename=checkpoint_name)
        

# Exceptions

In [5]:
class BaseSimCLRException(Exception):
    """Base exception"""


class InvalidBackboneError(BaseSimCLRException):
    """Raised when the choice of backbone Convnet is invalid."""


class InvalidDatasetSelection(BaseSimCLRException):
    """Raised when the choice of dataset is invalid."""


# Dataset

In [6]:
class ContrastiveLearningViewGenerator(object):
    """Take two random crops of one image as the query and key."""

    def __init__(self, base_transform, n_views=2):
        self.base_transform = base_transform
        self.n_views = n_views

    def __call__(self, x):
        return [self.base_transform(x) for i in range(self.n_views)]

In [7]:
from torchvision.transforms import transforms
from torchvision import transforms, datasets

from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset
from PIL import Image


class ContrastiveLearningDataset:
    def __init__(self, root_folder):
        self.root_folder = root_folder

    @staticmethod
    def get_simclr_pipeline_transform(size, s=1):
        """Return a set of data augmentation transformations as described in the SimCLR paper."""
        color_jitter = transforms.ColorJitter(0.8 * s, 0.8 * s, 0.8 * s, 0.2 * s)
        data_transforms = transforms.Compose([
    transforms.RandomResizedCrop(size=size),
    transforms.RandomHorizontalFlip(),
    transforms.RandomApply([color_jitter], p=0.8),
    transforms.RandomApply([transforms.RandomAffine(degrees=30)], p=0.5),   # modifikasi 1
    transforms.RandomApply([transforms.RandomSolarize(threshold=128)], p=0.5),  # modifikasi 1
    transforms.RandomGrayscale(p=0.2),
    GaussianBlur(kernel_size=int(0.1 * size)),
    transforms.ToTensor()
])

        return data_transforms

    def get_dataset(self, name, n_views):
        valid_datasets = {
                        'cifar10': lambda: datasets.CIFAR10(self.root_folder, train=True,
                                                          transform=ContrastiveLearningViewGenerator(
                                                              self.get_simclr_pipeline_transform(32),
                                                              n_views),
                                                          download=True),
                        'stl10': lambda: datasets.STL10(self.root_folder, split='unlabeled',
                                                      transform=ContrastiveLearningViewGenerator(
                                                          self.get_simclr_pipeline_transform(96),
                                                          n_views),
                                                      download=True),
                        'tinyimagenet' : lambda: ImageFolder(
                            root=os.path.join(self.root_folder, 'tiny-imagenet-200', 'train'),
                            transform=ContrastiveLearningViewGenerator(
                                self.get_simclr_pipeline_transform(64),  
                                n_views))
                        }

        try:
            dataset_fn = valid_datasets[name]
        except KeyError:
            raise InvalidDatasetSelection()
        else:
            return dataset_fn()

# Encoder Model

# **Backbone Encoder**
Encoder dalam SimCLR bertugas mengekstraksi fitur dari gambar sebelum masuk ke projection head. Dalam modifikasi ini, ResNet yang biasa digunakan diganti dengan EfficientNet.

Komentar:
EfficientNet dipilih karena efisiensinya yang tinggi dalam hal parameter dan akurasi. Pada Tiny ImageNet, penggunaan EfficientNet terbukti mempercepat proses konvergensi dan memberikan hasil loss yang lebih stabil dibandingkan ResNet. Ini penting karena dataset ini memiliki resolusi gambar yang kecil (64x64) dan encoder ringan seperti EfficientNet lebih cocok dibanding ResNet50.

# **Projection Head (MLP 2-layer)**
Penjelasan:
Projection head bertugas memetakan representasi ke ruang embedding kontrasif. Ini umumnya berupa MLP dua lapis (Linear–ReLU–Linear) dengan output dimensi laten (misal, 128).

Komentar:
Kamu menggunakan MLP seperti arsitektur SimCLR asli. Fungsi ini penting agar representasi encoder tetap umum (untuk downstream task) karena embedding kontrasif dipelajari di ruang terpisah. Penambahan batch normalization di antara layer bisa membantu stabilisasi.



In [8]:
import torchvision.models as models


class ResNetSimCLR(nn.Module):
    def __init__(self, base_model, out_dim):
        super(ResNetSimCLR, self).__init__()
        self.resnet_dict = {
            "resnet18": models.resnet18(weights=None),
            "resnet50": models.resnet50(weights=None),
            "efficientnet": efficientnet_b0(weights=None)
        }

        self.backbone = self._get_basemodel(base_model)

        if base_model == "efficientnet":
            dim_mlp = self.backbone.classifier[1].in_features
            self.backbone.classifier = nn.Sequential(
                nn.Linear(dim_mlp, dim_mlp),
                nn.ReLU(),
                nn.Linear(dim_mlp, out_dim)
            )
        else:
            dim_mlp = self.backbone.fc.in_features
            self.backbone.fc = nn.Sequential(
                nn.Linear(dim_mlp, dim_mlp),
                nn.ReLU(),
                nn.Linear(dim_mlp, out_dim)
            )

    def _get_basemodel(self, model_name):
        try:
            model = self.resnet_dict[model_name]
        except KeyError:
            raise InvalidBackboneError(
                "Invalid backbone architecture. Check the config file and pass one of: resnet18, resnet50, or efficientnet"
            )
        else:
            return model

    def forward(self, x):
        return self.backbone(x)


# Main

# **Loss Function (NT-Xent)**
Penjelasan:
NT-Xent loss menghitung kesamaan antara positive pairs dibandingkan dengan semua negative pairs dalam satu batch. Diperkuat dengan parameter temperature.

Komentar:
Penggunaan temperature = 0.1 sudah tepat, dan kamu juga sudah mengeksplorasi efeknya: saat kamu ubah menjadi 0.3 atau bahkan 0.007, terlihat dampak terhadap ketajaman distribusi logits (terlalu kecil bisa menyebabkan overfitting, terlalu besar menyebabkan training lambat). Hal ini menegaskan pentingnya tuning temperature yang sesuai.

In [9]:

model_names = sorted(name for name in models.__dict__
                     if name.islower() and not name.startswith("__")
                     and callable(models.__dict__[name]))
print(model_names)

['alexnet', 'convnext_base', 'convnext_large', 'convnext_small', 'convnext_tiny', 'densenet121', 'densenet161', 'densenet169', 'densenet201', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3', 'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'efficientnet_v2_l', 'efficientnet_v2_m', 'efficientnet_v2_s', 'get_model', 'get_model_builder', 'get_model_weights', 'get_weight', 'googlenet', 'inception_v3', 'list_models', 'maxvit_t', 'mnasnet0_5', 'mnasnet0_75', 'mnasnet1_0', 'mnasnet1_3', 'mobilenet_v2', 'mobilenet_v3_large', 'mobilenet_v3_small', 'regnet_x_16gf', 'regnet_x_1_6gf', 'regnet_x_32gf', 'regnet_x_3_2gf', 'regnet_x_400mf', 'regnet_x_800mf', 'regnet_x_8gf', 'regnet_y_128gf', 'regnet_y_16gf', 'regnet_y_1_6gf', 'regnet_y_32gf', 'regnet_y_3_2gf', 'regnet_y_400mf', 'regnet_y_800mf', 'regnet_y_8gf', 'resnet101', 'resnet152', 'resnet18', 'resnet34', 'resnet50', 'resnext101_32x8d', 'resnext101_64x4d', 'resnext50_32x4d', 'shufflenet_v2_x0_

# **Training Loop**
Penjelasan:
Loop pelatihan bertugas mengeksekusi proses optimisasi loss antara dua augmentasi dari input yang sama. Termasuk setup scheduler dan optimizer (Adam + CosineAnnealingLR).

Komentar:
Loop kamu sudah efisien, menggunakan Mixed Precision (fp16) untuk mempercepat training di GPU dan mengurangi konsumsi memori. Logging ke CSV dan visualisasi dengan matplotlib sangat membantu untuk memantau performa model dari waktu ke waktu.

Kamu juga berhasil menghindari overfitting meski menggunakan 50 epoch — ini menandakan kombinasi arsitektur dan parameter yang kamu gunakan cukup optimal untuk dataset ini.

In [10]:
from types import SimpleNamespace
import torch.backends.cudnn as cudnn

args = SimpleNamespace()
args.device = torch.device('cuda')
args.data = '/kaggle/input/tiny-imagenet/tiny-imagenet-200'
cudnn.deterministic = True
cudnn.benchmark = True
args.dataset_name = 'tinyimagenet'
args.n_views = 2
args.batch_size = 256
args.out_dim = 128
args.lr = 0.0003
args.weight_decay = 1e-4
args.arch = 'efficientnet'
args.workers = 2
args.gpu_index = 0
args.log_dir = '/kaggle/working/logs/simclr'
args.fp16_precision = True
args.epochs = 50
args.temperature = 0.1
args.seed = 1
args.log_every_n_steps = 100
dataset = ContrastiveLearningDataset(args.data)

train_dataset = dataset.get_dataset(args.dataset_name, args.n_views)

train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=args.batch_size, shuffle=True,
    num_workers=args.workers, pin_memory=True, drop_last=True)

model = ResNetSimCLR(base_model=args.arch, out_dim=args.out_dim)

optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.weight_decay)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(train_loader), eta_min=0,
                                                       last_epoch=-1)

In [11]:
with torch.cuda.device(args.gpu_index):
    simclr = SimCLR(model=model, optimizer=optimizer, scheduler=scheduler, args=args)
    simclr.train(train_loader)

  scaler = GradScaler(enabled=self.args.fp16_precision)
  with autocast(enabled=self.args.fp16_precision):
  0%|          | 1/390 [00:07<49:31,  7.64s/it]

[Epoch 1] Step 0: Loss=6.7702, Top1=0.20%, Top5=1.17%, LR=0.000300


 26%|██▌       | 101/390 [01:55<04:38,  1.04it/s]

[Epoch 1] Step 100: Loss=6.2337, Top1=0.20%, Top5=4.49%, LR=0.000300


 52%|█████▏    | 201/390 [03:47<03:08,  1.00it/s]

[Epoch 1] Step 200: Loss=6.1917, Top1=0.00%, Top5=2.15%, LR=0.000300


 77%|███████▋  | 300/390 [05:42<01:53,  1.26s/it]

[Epoch 1] Step 300: Loss=6.1437, Top1=0.39%, Top5=1.37%, LR=0.000300


100%|██████████| 390/390 [07:26<00:00,  1.15s/it]
  3%|▎         | 10/390 [00:08<04:18,  1.47it/s]

[Epoch 2] Step 400: Loss=6.1480, Top1=0.98%, Top5=3.71%, LR=0.000300


 28%|██▊       | 111/390 [01:30<04:18,  1.08it/s]

[Epoch 2] Step 500: Loss=6.0443, Top1=0.98%, Top5=6.05%, LR=0.000300


 54%|█████▍    | 210/390 [02:48<01:56,  1.54it/s]

[Epoch 2] Step 600: Loss=5.8343, Top1=1.95%, Top5=7.23%, LR=0.000300


 79%|███████▉  | 310/390 [04:06<00:56,  1.43it/s]

[Epoch 2] Step 700: Loss=5.7360, Top1=2.54%, Top5=11.52%, LR=0.000300


100%|██████████| 390/390 [05:07<00:00,  1.27it/s]
  5%|▌         | 20/390 [00:16<04:08,  1.49it/s]

[Epoch 3] Step 800: Loss=5.7333, Top1=2.93%, Top5=8.20%, LR=0.000300


 31%|███       | 121/390 [01:34<04:08,  1.08it/s]

[Epoch 3] Step 900: Loss=5.6461, Top1=1.76%, Top5=9.77%, LR=0.000300


 56%|█████▋    | 220/390 [02:50<01:57,  1.45it/s]

[Epoch 3] Step 1000: Loss=5.6032, Top1=4.30%, Top5=13.87%, LR=0.000300


 82%|████████▏ | 321/390 [04:09<00:58,  1.18it/s]

[Epoch 3] Step 1100: Loss=5.6397, Top1=3.91%, Top5=12.30%, LR=0.000300


100%|██████████| 390/390 [05:01<00:00,  1.29it/s]
  8%|▊         | 30/390 [00:23<04:12,  1.42it/s]

[Epoch 4] Step 1200: Loss=5.5181, Top1=5.66%, Top5=14.06%, LR=0.000300


 33%|███▎      | 130/390 [01:39<02:55,  1.48it/s]

[Epoch 4] Step 1300: Loss=5.2213, Top1=8.79%, Top5=23.44%, LR=0.000300


 59%|█████▉    | 231/390 [02:58<02:15,  1.17it/s]

[Epoch 4] Step 1400: Loss=5.4442, Top1=5.27%, Top5=13.09%, LR=0.000300


 85%|████████▍ | 331/390 [04:14<00:50,  1.16it/s]

[Epoch 4] Step 1500: Loss=5.1365, Top1=7.23%, Top5=19.92%, LR=0.000300


100%|██████████| 390/390 [04:59<00:00,  1.30it/s]
 11%|█         | 41/390 [00:31<04:43,  1.23it/s]

[Epoch 5] Step 1600: Loss=5.1758, Top1=8.01%, Top5=23.83%, LR=0.000300


 36%|███▌      | 141/390 [01:45<03:22,  1.23it/s]

[Epoch 5] Step 1700: Loss=5.2021, Top1=6.84%, Top5=19.53%, LR=0.000300


 62%|██████▏   | 241/390 [02:59<02:04,  1.20it/s]

[Epoch 5] Step 1800: Loss=5.2902, Top1=7.42%, Top5=19.73%, LR=0.000300


 87%|████████▋ | 340/390 [04:13<00:32,  1.52it/s]

[Epoch 5] Step 1900: Loss=5.0922, Top1=9.18%, Top5=22.66%, LR=0.000300


100%|██████████| 390/390 [04:51<00:00,  1.34it/s]
 13%|█▎        | 51/390 [00:38<04:42,  1.20it/s]

[Epoch 6] Step 2000: Loss=5.0913, Top1=7.03%, Top5=21.68%, LR=0.000300


 39%|███▊      | 151/390 [01:52<03:04,  1.30it/s]

[Epoch 6] Step 2100: Loss=5.2423, Top1=8.79%, Top5=20.12%, LR=0.000300


 64%|██████▍   | 250/390 [03:07<01:58,  1.18it/s]

[Epoch 6] Step 2200: Loss=4.7664, Top1=12.89%, Top5=29.49%, LR=0.000300


 90%|█████████ | 351/390 [04:23<00:24,  1.57it/s]

[Epoch 6] Step 2300: Loss=4.8043, Top1=10.94%, Top5=26.95%, LR=0.000300


100%|██████████| 390/390 [04:53<00:00,  1.33it/s]
 16%|█▌        | 61/390 [00:46<04:36,  1.19it/s]

[Epoch 7] Step 2400: Loss=4.8205, Top1=11.13%, Top5=26.56%, LR=0.000300


 41%|████▏     | 161/390 [02:00<03:10,  1.21it/s]

[Epoch 7] Step 2500: Loss=4.7890, Top1=11.33%, Top5=25.39%, LR=0.000300


 67%|██████▋   | 261/390 [03:15<01:48,  1.19it/s]

[Epoch 7] Step 2600: Loss=4.8855, Top1=10.94%, Top5=28.12%, LR=0.000300


 92%|█████████▏| 360/390 [04:30<00:23,  1.29it/s]

[Epoch 7] Step 2700: Loss=4.7815, Top1=17.97%, Top5=33.01%, LR=0.000300


100%|██████████| 390/390 [04:54<00:00,  1.33it/s]
 18%|█▊        | 71/390 [00:55<04:19,  1.23it/s]

[Epoch 8] Step 2800: Loss=4.7132, Top1=14.65%, Top5=30.66%, LR=0.000300


 44%|████▍     | 171/390 [02:13<02:55,  1.25it/s]

[Epoch 8] Step 2900: Loss=4.7653, Top1=12.11%, Top5=27.93%, LR=0.000300


 69%|██████▉   | 271/390 [03:28<01:30,  1.31it/s]

[Epoch 8] Step 3000: Loss=4.8152, Top1=10.55%, Top5=24.41%, LR=0.000300


 95%|█████████▌| 371/390 [04:45<00:15,  1.19it/s]

[Epoch 8] Step 3100: Loss=4.7318, Top1=15.04%, Top5=31.25%, LR=0.000300


100%|██████████| 390/390 [05:00<00:00,  1.30it/s]
 21%|██        | 81/390 [01:03<03:55,  1.31it/s]

[Epoch 9] Step 3200: Loss=4.5748, Top1=15.04%, Top5=33.40%, LR=0.000300


 46%|████▋     | 181/390 [02:19<02:34,  1.35it/s]

[Epoch 9] Step 3300: Loss=4.5616, Top1=15.62%, Top5=31.84%, LR=0.000300


 72%|███████▏  | 281/390 [03:36<01:13,  1.47it/s]

[Epoch 9] Step 3400: Loss=4.7383, Top1=14.65%, Top5=31.64%, LR=0.000300


 98%|█████████▊| 381/390 [04:53<00:06,  1.46it/s]

[Epoch 9] Step 3500: Loss=4.5470, Top1=16.02%, Top5=33.79%, LR=0.000300


100%|██████████| 390/390 [05:00<00:00,  1.30it/s]
 23%|██▎       | 90/390 [01:09<03:20,  1.50it/s]

[Epoch 10] Step 3600: Loss=4.5337, Top1=19.14%, Top5=38.67%, LR=0.000300


 49%|████▉     | 191/390 [02:27<02:53,  1.15it/s]

[Epoch 10] Step 3700: Loss=4.5774, Top1=16.99%, Top5=34.18%, LR=0.000300


 74%|███████▍  | 290/390 [03:42<01:09,  1.44it/s]

[Epoch 10] Step 3800: Loss=4.3158, Top1=22.07%, Top5=41.80%, LR=0.000300


100%|██████████| 390/390 [04:58<00:00,  1.31it/s]
  0%|          | 1/390 [00:01<12:06,  1.87s/it]

[Epoch 11] Step 3900: Loss=4.4618, Top1=20.70%, Top5=34.18%, LR=0.000300


 26%|██▌       | 101/390 [01:18<04:12,  1.14it/s]

[Epoch 11] Step 4000: Loss=4.4923, Top1=19.34%, Top5=32.23%, LR=0.000300


 52%|█████▏    | 201/390 [02:34<02:39,  1.19it/s]

[Epoch 11] Step 4100: Loss=4.4080, Top1=18.75%, Top5=36.33%, LR=0.000300


 77%|███████▋  | 301/390 [03:49<01:13,  1.21it/s]

[Epoch 11] Step 4200: Loss=4.4225, Top1=21.29%, Top5=39.84%, LR=0.000300


100%|██████████| 390/390 [04:55<00:00,  1.32it/s]
  3%|▎         | 11/390 [00:09<05:21,  1.18it/s]

[Epoch 12] Step 4300: Loss=4.2628, Top1=20.12%, Top5=40.23%, LR=0.000300


 28%|██▊       | 111/390 [01:25<03:50,  1.21it/s]

[Epoch 12] Step 4400: Loss=4.4204, Top1=18.75%, Top5=36.52%, LR=0.000300


 54%|█████▍    | 211/390 [02:42<02:18,  1.29it/s]

[Epoch 12] Step 4500: Loss=4.2263, Top1=19.34%, Top5=40.23%, LR=0.000300


 80%|███████▉  | 311/390 [03:59<01:00,  1.30it/s]

[Epoch 12] Step 4600: Loss=4.2354, Top1=22.07%, Top5=41.60%, LR=0.000300


100%|██████████| 390/390 [05:00<00:00,  1.30it/s]
  5%|▌         | 20/390 [00:15<03:56,  1.57it/s]

[Epoch 13] Step 4700: Loss=3.9675, Top1=25.20%, Top5=44.53%, LR=0.000300


 31%|███       | 120/390 [01:32<02:57,  1.52it/s]

[Epoch 13] Step 4800: Loss=4.0972, Top1=25.78%, Top5=47.27%, LR=0.000300


 57%|█████▋    | 221/390 [02:49<02:24,  1.17it/s]

[Epoch 13] Step 4900: Loss=4.0849, Top1=21.48%, Top5=43.36%, LR=0.000300


 82%|████████▏ | 321/390 [04:04<00:54,  1.26it/s]

[Epoch 13] Step 5000: Loss=4.3788, Top1=21.68%, Top5=39.26%, LR=0.000300


100%|██████████| 390/390 [04:57<00:00,  1.31it/s]
  8%|▊         | 31/390 [00:24<05:05,  1.18it/s]

[Epoch 14] Step 5100: Loss=4.2975, Top1=21.48%, Top5=36.33%, LR=0.000300


 33%|███▎      | 130/390 [01:39<03:20,  1.30it/s]

[Epoch 14] Step 5200: Loss=3.9360, Top1=29.69%, Top5=48.24%, LR=0.000300


 59%|█████▉    | 231/390 [02:57<01:47,  1.48it/s]

[Epoch 14] Step 5300: Loss=3.9374, Top1=25.20%, Top5=43.36%, LR=0.000300


 85%|████████▍ | 330/390 [04:14<00:52,  1.14it/s]

[Epoch 14] Step 5400: Loss=3.6467, Top1=32.23%, Top5=51.95%, LR=0.000300


100%|██████████| 390/390 [05:01<00:00,  1.29it/s]
 11%|█         | 41/390 [00:32<05:15,  1.11it/s]

[Epoch 15] Step 5500: Loss=3.8221, Top1=27.34%, Top5=47.07%, LR=0.000300


 36%|███▌      | 141/390 [01:51<03:40,  1.13it/s]

[Epoch 15] Step 5600: Loss=3.9192, Top1=29.30%, Top5=45.51%, LR=0.000300


 62%|██████▏   | 241/390 [03:11<02:16,  1.09it/s]

[Epoch 15] Step 5700: Loss=3.7902, Top1=26.76%, Top5=46.88%, LR=0.000300


 87%|████████▋ | 341/390 [04:29<00:44,  1.10it/s]

[Epoch 15] Step 5800: Loss=4.0537, Top1=26.37%, Top5=44.14%, LR=0.000300


100%|██████████| 390/390 [05:06<00:00,  1.27it/s]
 13%|█▎        | 51/390 [00:39<04:35,  1.23it/s]

[Epoch 16] Step 5900: Loss=4.0714, Top1=25.78%, Top5=43.36%, LR=0.000300


 39%|███▊      | 151/390 [01:57<03:19,  1.20it/s]

[Epoch 16] Step 6000: Loss=4.1201, Top1=20.90%, Top5=41.80%, LR=0.000300


 64%|██████▍   | 251/390 [03:15<02:01,  1.14it/s]

[Epoch 16] Step 6100: Loss=3.9096, Top1=27.93%, Top5=47.27%, LR=0.000300


 90%|█████████ | 351/390 [04:33<00:33,  1.15it/s]

[Epoch 16] Step 6200: Loss=3.8390, Top1=28.71%, Top5=47.46%, LR=0.000300


100%|██████████| 390/390 [05:03<00:00,  1.28it/s]
 16%|█▌        | 61/390 [00:49<05:00,  1.09it/s]

[Epoch 17] Step 6300: Loss=4.0014, Top1=26.95%, Top5=44.73%, LR=0.000300


 41%|████      | 160/390 [02:05<02:32,  1.51it/s]

[Epoch 17] Step 6400: Loss=3.7976, Top1=32.42%, Top5=49.02%, LR=0.000300


 67%|██████▋   | 260/390 [03:24<01:29,  1.45it/s]

[Epoch 17] Step 6500: Loss=3.5448, Top1=33.01%, Top5=52.34%, LR=0.000300


 93%|█████████▎| 361/390 [04:42<00:24,  1.17it/s]

[Epoch 17] Step 6600: Loss=3.9782, Top1=23.83%, Top5=42.58%, LR=0.000300


100%|██████████| 390/390 [05:03<00:00,  1.28it/s]
 18%|█▊        | 71/390 [00:59<04:38,  1.15it/s]

[Epoch 18] Step 6700: Loss=3.6439, Top1=29.10%, Top5=49.41%, LR=0.000300


 44%|████▍     | 171/390 [02:18<03:12,  1.14it/s]

[Epoch 18] Step 6800: Loss=4.1433, Top1=23.05%, Top5=41.21%, LR=0.000300


 69%|██████▉   | 271/390 [03:37<01:40,  1.18it/s]

[Epoch 18] Step 6900: Loss=3.7605, Top1=30.66%, Top5=50.39%, LR=0.000300


 95%|█████████▍| 370/390 [04:53<00:13,  1.43it/s]

[Epoch 18] Step 7000: Loss=3.3525, Top1=37.70%, Top5=53.52%, LR=0.000300


100%|██████████| 390/390 [05:09<00:00,  1.26it/s]
 21%|██        | 81/390 [01:02<04:10,  1.23it/s]

[Epoch 19] Step 7100: Loss=3.6415, Top1=32.42%, Top5=50.78%, LR=0.000300


 46%|████▋     | 181/390 [02:19<02:31,  1.38it/s]

[Epoch 19] Step 7200: Loss=3.6616, Top1=32.62%, Top5=53.12%, LR=0.000300


 72%|███████▏  | 281/390 [03:36<01:13,  1.49it/s]

[Epoch 19] Step 7300: Loss=3.5787, Top1=33.79%, Top5=50.00%, LR=0.000300


 98%|█████████▊| 381/390 [04:53<00:06,  1.49it/s]

[Epoch 19] Step 7400: Loss=3.6393, Top1=35.94%, Top5=50.98%, LR=0.000300


100%|██████████| 390/390 [05:01<00:00,  1.29it/s]
 23%|██▎       | 91/390 [01:12<04:25,  1.13it/s]

[Epoch 20] Step 7500: Loss=3.5518, Top1=31.64%, Top5=49.61%, LR=0.000300


 49%|████▉     | 191/390 [02:30<02:47,  1.19it/s]

[Epoch 20] Step 7600: Loss=3.6046, Top1=35.35%, Top5=52.15%, LR=0.000300


 75%|███████▍  | 291/390 [03:47<01:26,  1.15it/s]

[Epoch 20] Step 7700: Loss=3.6444, Top1=30.47%, Top5=50.39%, LR=0.000300


100%|██████████| 390/390 [05:05<00:00,  1.28it/s]
  0%|          | 1/390 [00:01<11:36,  1.79s/it]

[Epoch 21] Step 7800: Loss=3.5411, Top1=33.40%, Top5=54.88%, LR=0.000300


 26%|██▌       | 100/390 [01:17<03:28,  1.39it/s]

[Epoch 21] Step 7900: Loss=3.3491, Top1=38.09%, Top5=56.84%, LR=0.000300


 52%|█████▏    | 201/390 [02:35<02:12,  1.43it/s]

[Epoch 21] Step 8000: Loss=3.5545, Top1=34.77%, Top5=52.93%, LR=0.000300


 77%|███████▋  | 301/390 [03:52<01:04,  1.39it/s]

[Epoch 21] Step 8100: Loss=3.4980, Top1=33.20%, Top5=49.22%, LR=0.000300


100%|██████████| 390/390 [05:01<00:00,  1.29it/s]
  3%|▎         | 11/390 [00:09<05:42,  1.11it/s]

[Epoch 22] Step 8200: Loss=3.5734, Top1=33.20%, Top5=50.78%, LR=0.000299


 28%|██▊       | 111/390 [01:26<04:01,  1.16it/s]

[Epoch 22] Step 8300: Loss=3.3317, Top1=35.16%, Top5=55.27%, LR=0.000299


 54%|█████▍    | 211/390 [02:43<02:29,  1.20it/s]

[Epoch 22] Step 8400: Loss=3.3438, Top1=36.52%, Top5=55.08%, LR=0.000299


 79%|███████▉  | 310/390 [03:58<00:53,  1.51it/s]

[Epoch 22] Step 8500: Loss=3.2456, Top1=40.43%, Top5=60.16%, LR=0.000299


100%|██████████| 390/390 [05:00<00:00,  1.30it/s]
  5%|▌         | 21/390 [00:17<05:26,  1.13it/s]

[Epoch 23] Step 8600: Loss=3.2862, Top1=34.77%, Top5=56.25%, LR=0.000299


 31%|███       | 121/390 [01:34<03:59,  1.13it/s]

[Epoch 23] Step 8700: Loss=3.5522, Top1=33.59%, Top5=53.52%, LR=0.000299


 57%|█████▋    | 221/390 [02:51<02:22,  1.19it/s]

[Epoch 23] Step 8800: Loss=3.4004, Top1=35.55%, Top5=53.32%, LR=0.000299


 82%|████████▏ | 321/390 [04:10<01:01,  1.12it/s]

[Epoch 23] Step 8900: Loss=3.2364, Top1=36.52%, Top5=54.88%, LR=0.000299


100%|██████████| 390/390 [05:03<00:00,  1.29it/s]
  8%|▊         | 31/390 [00:24<05:10,  1.16it/s]

[Epoch 24] Step 9000: Loss=3.2494, Top1=37.70%, Top5=61.33%, LR=0.000299


 34%|███▎      | 131/390 [01:40<03:44,  1.16it/s]

[Epoch 24] Step 9100: Loss=3.2808, Top1=37.70%, Top5=56.64%, LR=0.000299


 59%|█████▉    | 231/390 [02:57<02:12,  1.20it/s]

[Epoch 24] Step 9200: Loss=3.2745, Top1=37.89%, Top5=58.59%, LR=0.000299


 85%|████████▍ | 331/390 [04:14<00:46,  1.27it/s]

[Epoch 24] Step 9300: Loss=3.3661, Top1=34.96%, Top5=55.86%, LR=0.000299


100%|██████████| 390/390 [04:59<00:00,  1.30it/s]
 11%|█         | 41/390 [00:33<05:05,  1.14it/s]

[Epoch 25] Step 9400: Loss=3.4329, Top1=32.23%, Top5=54.49%, LR=0.000299


 36%|███▌      | 141/390 [01:53<03:46,  1.10it/s]

[Epoch 25] Step 9500: Loss=3.4683, Top1=36.72%, Top5=54.30%, LR=0.000299


 62%|██████▏   | 241/390 [03:15<02:16,  1.09it/s]

[Epoch 25] Step 9600: Loss=3.3333, Top1=35.16%, Top5=57.03%, LR=0.000299


 87%|████████▋ | 341/390 [04:34<00:43,  1.13it/s]

[Epoch 25] Step 9700: Loss=3.4267, Top1=35.94%, Top5=55.27%, LR=0.000299


100%|██████████| 390/390 [05:12<00:00,  1.25it/s]
 13%|█▎        | 51/390 [00:40<04:54,  1.15it/s]

[Epoch 26] Step 9800: Loss=3.4469, Top1=38.28%, Top5=56.64%, LR=0.000299


 39%|███▊      | 151/390 [01:56<02:43,  1.46it/s]

[Epoch 26] Step 9900: Loss=3.4156, Top1=36.33%, Top5=52.93%, LR=0.000299


 64%|██████▍   | 251/390 [03:13<01:36,  1.44it/s]

[Epoch 26] Step 10000: Loss=3.3550, Top1=37.11%, Top5=53.71%, LR=0.000299


 90%|█████████ | 351/390 [04:30<00:28,  1.38it/s]

[Epoch 26] Step 10100: Loss=3.1878, Top1=38.67%, Top5=58.01%, LR=0.000299


100%|██████████| 390/390 [05:01<00:00,  1.29it/s]
 16%|█▌        | 61/390 [00:47<04:40,  1.17it/s]

[Epoch 27] Step 10200: Loss=3.4228, Top1=36.72%, Top5=53.12%, LR=0.000299


 41%|████▏     | 161/390 [02:03<03:14,  1.18it/s]

[Epoch 27] Step 10300: Loss=3.2853, Top1=37.89%, Top5=55.47%, LR=0.000299


 67%|██████▋   | 260/390 [03:19<01:27,  1.49it/s]

[Epoch 27] Step 10400: Loss=3.0221, Top1=44.53%, Top5=61.91%, LR=0.000299


 93%|█████████▎| 361/390 [04:37<00:24,  1.17it/s]

[Epoch 27] Step 10500: Loss=3.3015, Top1=36.91%, Top5=56.45%, LR=0.000299


100%|██████████| 390/390 [04:59<00:00,  1.30it/s]
 18%|█▊        | 71/390 [00:55<04:34,  1.16it/s]

[Epoch 28] Step 10600: Loss=3.3828, Top1=38.28%, Top5=53.91%, LR=0.000299


 44%|████▍     | 171/390 [02:11<02:57,  1.24it/s]

[Epoch 28] Step 10700: Loss=3.4376, Top1=34.38%, Top5=53.91%, LR=0.000299


 69%|██████▉   | 271/390 [03:27<01:30,  1.32it/s]

[Epoch 28] Step 10800: Loss=2.8549, Top1=44.34%, Top5=64.45%, LR=0.000299


 95%|█████████▌| 371/390 [04:44<00:14,  1.29it/s]

[Epoch 28] Step 10900: Loss=3.3046, Top1=39.06%, Top5=56.84%, LR=0.000299


100%|██████████| 390/390 [04:58<00:00,  1.31it/s]
 21%|██        | 81/390 [01:03<04:37,  1.11it/s]

[Epoch 29] Step 11000: Loss=3.2265, Top1=39.06%, Top5=58.98%, LR=0.000298


 46%|████▋     | 181/390 [02:20<02:57,  1.18it/s]

[Epoch 29] Step 11100: Loss=2.9530, Top1=43.95%, Top5=65.43%, LR=0.000298


 72%|███████▏  | 281/390 [03:37<01:34,  1.15it/s]

[Epoch 29] Step 11200: Loss=3.2510, Top1=40.43%, Top5=59.18%, LR=0.000298


 98%|█████████▊| 381/390 [04:54<00:07,  1.20it/s]

[Epoch 29] Step 11300: Loss=3.0692, Top1=38.87%, Top5=60.74%, LR=0.000298


100%|██████████| 390/390 [05:01<00:00,  1.29it/s]
 23%|██▎       | 91/390 [01:12<04:14,  1.17it/s]

[Epoch 30] Step 11400: Loss=3.1850, Top1=39.06%, Top5=60.35%, LR=0.000298


 49%|████▉     | 191/390 [02:29<02:55,  1.13it/s]

[Epoch 30] Step 11500: Loss=3.4103, Top1=34.18%, Top5=53.52%, LR=0.000298


 75%|███████▍  | 291/390 [03:46<01:28,  1.12it/s]

[Epoch 30] Step 11600: Loss=3.1816, Top1=41.41%, Top5=60.35%, LR=0.000298


100%|██████████| 390/390 [05:03<00:00,  1.28it/s]
  0%|          | 1/390 [00:01<12:04,  1.86s/it]

[Epoch 31] Step 11700: Loss=3.3301, Top1=38.28%, Top5=58.98%, LR=0.000298


 26%|██▌       | 101/390 [01:19<04:02,  1.19it/s]

[Epoch 31] Step 11800: Loss=3.1826, Top1=43.16%, Top5=58.59%, LR=0.000298


 52%|█████▏    | 201/390 [02:36<02:44,  1.15it/s]

[Epoch 31] Step 11900: Loss=3.1420, Top1=41.21%, Top5=61.33%, LR=0.000298


 77%|███████▋  | 301/390 [03:54<01:14,  1.19it/s]

[Epoch 31] Step 12000: Loss=3.1665, Top1=38.67%, Top5=62.11%, LR=0.000298


100%|██████████| 390/390 [05:05<00:00,  1.27it/s]
  3%|▎         | 11/390 [00:09<05:36,  1.13it/s]

[Epoch 32] Step 12100: Loss=3.2436, Top1=40.04%, Top5=57.42%, LR=0.000298


 28%|██▊       | 110/390 [01:26<03:18,  1.41it/s]

[Epoch 32] Step 12200: Loss=2.8670, Top1=46.88%, Top5=62.89%, LR=0.000298


 54%|█████▍    | 211/390 [02:45<02:36,  1.15it/s]

[Epoch 32] Step 12300: Loss=3.2440, Top1=39.84%, Top5=55.47%, LR=0.000298


 80%|███████▉  | 311/390 [04:02<01:08,  1.15it/s]

[Epoch 32] Step 12400: Loss=3.2128, Top1=39.65%, Top5=56.84%, LR=0.000298


100%|██████████| 390/390 [05:03<00:00,  1.29it/s]
  5%|▌         | 21/390 [00:17<05:25,  1.13it/s]

[Epoch 33] Step 12500: Loss=2.8446, Top1=44.34%, Top5=65.04%, LR=0.000298


 31%|███       | 121/390 [01:35<04:04,  1.10it/s]

[Epoch 33] Step 12600: Loss=3.1944, Top1=42.77%, Top5=59.38%, LR=0.000298


 57%|█████▋    | 221/390 [02:56<02:42,  1.04it/s]

[Epoch 33] Step 12700: Loss=3.1476, Top1=42.77%, Top5=63.87%, LR=0.000298


 82%|████████▏ | 321/390 [04:17<01:03,  1.09it/s]

[Epoch 33] Step 12800: Loss=3.0200, Top1=43.36%, Top5=63.67%, LR=0.000298


100%|██████████| 390/390 [05:10<00:00,  1.26it/s]
  8%|▊         | 31/390 [00:24<05:07,  1.17it/s]

[Epoch 34] Step 12900: Loss=3.0841, Top1=38.48%, Top5=60.94%, LR=0.000297


 34%|███▎      | 131/390 [01:42<03:43,  1.16it/s]

[Epoch 34] Step 13000: Loss=3.1091, Top1=36.33%, Top5=57.42%, LR=0.000297


 59%|█████▉    | 230/390 [03:02<01:52,  1.43it/s]

[Epoch 34] Step 13100: Loss=2.8807, Top1=48.44%, Top5=64.26%, LR=0.000297


 85%|████████▍ | 331/390 [04:23<00:52,  1.13it/s]

[Epoch 34] Step 13200: Loss=3.0433, Top1=39.26%, Top5=61.33%, LR=0.000297


100%|██████████| 390/390 [05:08<00:00,  1.27it/s]
 11%|█         | 41/390 [00:32<05:01,  1.16it/s]

[Epoch 35] Step 13300: Loss=3.0547, Top1=43.36%, Top5=61.91%, LR=0.000297


 36%|███▌      | 141/390 [01:49<03:33,  1.17it/s]

[Epoch 35] Step 13400: Loss=2.9884, Top1=45.90%, Top5=63.87%, LR=0.000297


 62%|██████▏   | 241/390 [03:06<02:08,  1.16it/s]

[Epoch 35] Step 13500: Loss=3.1079, Top1=41.60%, Top5=58.98%, LR=0.000297


 87%|████████▋ | 341/390 [04:22<00:39,  1.23it/s]

[Epoch 35] Step 13600: Loss=3.1720, Top1=43.36%, Top5=61.52%, LR=0.000297


100%|██████████| 390/390 [04:58<00:00,  1.30it/s]
 13%|█▎        | 51/390 [00:40<04:54,  1.15it/s]

[Epoch 36] Step 13700: Loss=2.8571, Top1=46.88%, Top5=64.26%, LR=0.000297


 39%|███▊      | 151/390 [01:55<03:24,  1.17it/s]

[Epoch 36] Step 13800: Loss=2.9251, Top1=45.51%, Top5=62.89%, LR=0.000297


 64%|██████▍   | 251/390 [03:11<01:59,  1.16it/s]

[Epoch 36] Step 13900: Loss=2.9267, Top1=44.53%, Top5=63.87%, LR=0.000297


 90%|████████▉ | 350/390 [04:27<00:27,  1.48it/s]

[Epoch 36] Step 14000: Loss=2.7890, Top1=49.41%, Top5=66.80%, LR=0.000297


100%|██████████| 390/390 [04:58<00:00,  1.31it/s]
 16%|█▌        | 61/390 [00:48<04:34,  1.20it/s]

[Epoch 37] Step 14100: Loss=2.9968, Top1=44.92%, Top5=65.23%, LR=0.000297


 41%|████▏     | 161/390 [02:03<03:11,  1.20it/s]

[Epoch 37] Step 14200: Loss=2.8255, Top1=49.02%, Top5=65.82%, LR=0.000297


 67%|██████▋   | 261/390 [03:19<01:51,  1.15it/s]

[Epoch 37] Step 14300: Loss=3.0966, Top1=41.60%, Top5=61.33%, LR=0.000297


 92%|█████████▏| 360/390 [04:38<00:20,  1.46it/s]

[Epoch 37] Step 14400: Loss=2.8850, Top1=50.00%, Top5=66.99%, LR=0.000297


100%|██████████| 390/390 [05:02<00:00,  1.29it/s]
 18%|█▊        | 71/390 [00:55<04:38,  1.14it/s]

[Epoch 38] Step 14500: Loss=2.8409, Top1=48.44%, Top5=65.43%, LR=0.000296


 44%|████▍     | 171/390 [02:12<03:09,  1.15it/s]

[Epoch 38] Step 14600: Loss=3.0066, Top1=43.16%, Top5=63.87%, LR=0.000296


 69%|██████▉   | 271/390 [03:29<01:45,  1.12it/s]

[Epoch 38] Step 14700: Loss=2.9151, Top1=44.92%, Top5=64.45%, LR=0.000296


 95%|█████████▍| 370/390 [04:45<00:13,  1.49it/s]

[Epoch 38] Step 14800: Loss=2.8659, Top1=50.20%, Top5=65.23%, LR=0.000296


100%|██████████| 390/390 [05:01<00:00,  1.30it/s]
 21%|██        | 80/390 [01:02<03:27,  1.49it/s]

[Epoch 39] Step 14900: Loss=2.7240, Top1=51.56%, Top5=68.95%, LR=0.000296


 46%|████▋     | 181/390 [02:20<02:57,  1.18it/s]

[Epoch 39] Step 15000: Loss=2.7640, Top1=50.59%, Top5=68.36%, LR=0.000296


 72%|███████▏  | 281/390 [03:38<01:35,  1.14it/s]

[Epoch 39] Step 15100: Loss=3.0458, Top1=42.38%, Top5=63.87%, LR=0.000296


 98%|█████████▊| 381/390 [04:56<00:07,  1.17it/s]

[Epoch 39] Step 15200: Loss=2.8603, Top1=47.85%, Top5=66.02%, LR=0.000296


100%|██████████| 390/390 [05:03<00:00,  1.29it/s]
 23%|██▎       | 91/390 [01:11<03:50,  1.30it/s]

[Epoch 40] Step 15300: Loss=2.7815, Top1=49.61%, Top5=65.62%, LR=0.000296


 49%|████▉     | 191/390 [02:29<02:17,  1.45it/s]

[Epoch 40] Step 15400: Loss=2.9875, Top1=38.48%, Top5=62.30%, LR=0.000296


 75%|███████▍  | 291/390 [03:46<01:06,  1.50it/s]

[Epoch 40] Step 15500: Loss=2.7460, Top1=46.09%, Top5=68.16%, LR=0.000296


100%|██████████| 390/390 [05:04<00:00,  1.28it/s]
  0%|          | 1/390 [00:01<12:19,  1.90s/it]

[Epoch 41] Step 15600: Loss=2.9144, Top1=44.53%, Top5=64.84%, LR=0.000296


 26%|██▌       | 101/390 [01:19<04:25,  1.09it/s]

[Epoch 41] Step 15700: Loss=2.7610, Top1=47.46%, Top5=67.58%, LR=0.000296


 52%|█████▏    | 201/390 [02:37<02:44,  1.15it/s]

[Epoch 41] Step 15800: Loss=2.8155, Top1=47.07%, Top5=64.06%, LR=0.000296


 77%|███████▋  | 301/390 [03:53<01:14,  1.19it/s]

[Epoch 41] Step 15900: Loss=2.7766, Top1=46.68%, Top5=66.60%, LR=0.000296


100%|██████████| 390/390 [05:01<00:00,  1.29it/s]
  3%|▎         | 11/390 [00:09<05:28,  1.15it/s]

[Epoch 42] Step 16000: Loss=2.8071, Top1=45.90%, Top5=66.21%, LR=0.000295


 28%|██▊       | 111/390 [01:27<04:02,  1.15it/s]

[Epoch 42] Step 16100: Loss=2.7099, Top1=49.02%, Top5=69.34%, LR=0.000295


 54%|█████▍    | 211/390 [02:45<02:41,  1.11it/s]

[Epoch 42] Step 16200: Loss=2.9343, Top1=46.29%, Top5=65.62%, LR=0.000295


 80%|███████▉  | 311/390 [04:04<01:08,  1.16it/s]

[Epoch 42] Step 16300: Loss=2.8796, Top1=46.09%, Top5=66.21%, LR=0.000295


100%|██████████| 390/390 [05:07<00:00,  1.27it/s]
  5%|▌         | 21/390 [00:17<05:27,  1.13it/s]

[Epoch 43] Step 16400: Loss=2.7652, Top1=48.44%, Top5=66.99%, LR=0.000295


 31%|███       | 121/390 [01:33<03:54,  1.15it/s]

[Epoch 43] Step 16500: Loss=2.7401, Top1=49.41%, Top5=66.99%, LR=0.000295


 57%|█████▋    | 221/390 [02:51<02:33,  1.10it/s]

[Epoch 43] Step 16600: Loss=2.8435, Top1=46.68%, Top5=68.36%, LR=0.000295


 82%|████████▏ | 321/390 [04:11<01:00,  1.14it/s]

[Epoch 43] Step 16700: Loss=2.6898, Top1=48.44%, Top5=66.60%, LR=0.000295


100%|██████████| 390/390 [05:03<00:00,  1.28it/s]
  8%|▊         | 31/390 [00:24<05:04,  1.18it/s]

[Epoch 44] Step 16800: Loss=2.6695, Top1=50.78%, Top5=69.34%, LR=0.000295


 34%|███▎      | 131/390 [01:40<03:33,  1.21it/s]

[Epoch 44] Step 16900: Loss=2.5621, Top1=50.98%, Top5=71.88%, LR=0.000295


 59%|█████▉    | 231/390 [02:56<02:14,  1.18it/s]

[Epoch 44] Step 17000: Loss=2.8602, Top1=46.48%, Top5=66.02%, LR=0.000295


 85%|████████▍ | 331/390 [04:13<00:50,  1.17it/s]

[Epoch 44] Step 17100: Loss=2.7238, Top1=48.24%, Top5=69.53%, LR=0.000295


100%|██████████| 390/390 [04:57<00:00,  1.31it/s]
 11%|█         | 41/390 [00:32<05:02,  1.15it/s]

[Epoch 45] Step 17200: Loss=2.7967, Top1=45.31%, Top5=66.60%, LR=0.000294


 36%|███▌      | 141/390 [01:47<03:32,  1.17it/s]

[Epoch 45] Step 17300: Loss=2.7748, Top1=46.88%, Top5=67.97%, LR=0.000294


 62%|██████▏   | 241/390 [03:02<02:04,  1.20it/s]

[Epoch 45] Step 17400: Loss=2.8498, Top1=47.85%, Top5=68.55%, LR=0.000294


 87%|████████▋ | 341/390 [04:18<00:42,  1.16it/s]

[Epoch 45] Step 17500: Loss=3.0393, Top1=45.12%, Top5=63.48%, LR=0.000294


100%|██████████| 390/390 [04:55<00:00,  1.32it/s]
 13%|█▎        | 51/390 [00:40<05:06,  1.11it/s]

[Epoch 46] Step 17600: Loss=2.7752, Top1=46.88%, Top5=65.04%, LR=0.000294


 39%|███▊      | 151/390 [01:56<03:27,  1.15it/s]

[Epoch 46] Step 17700: Loss=2.7623, Top1=47.85%, Top5=65.43%, LR=0.000294


 64%|██████▍   | 251/390 [03:13<02:02,  1.13it/s]

[Epoch 46] Step 17800: Loss=2.7738, Top1=47.46%, Top5=67.97%, LR=0.000294


 90%|████████▉ | 350/390 [04:27<00:25,  1.57it/s]

[Epoch 46] Step 17900: Loss=2.6952, Top1=52.54%, Top5=69.73%, LR=0.000294


100%|██████████| 390/390 [04:57<00:00,  1.31it/s]
 16%|█▌        | 61/390 [00:48<04:47,  1.14it/s]

[Epoch 47] Step 18000: Loss=2.6971, Top1=51.76%, Top5=70.31%, LR=0.000294


 41%|████▏     | 161/390 [02:04<03:14,  1.18it/s]

[Epoch 47] Step 18100: Loss=2.8555, Top1=44.53%, Top5=65.43%, LR=0.000294


 67%|██████▋   | 261/390 [03:19<01:49,  1.18it/s]

[Epoch 47] Step 18200: Loss=2.9099, Top1=50.59%, Top5=64.84%, LR=0.000294


 92%|█████████▏| 360/390 [04:33<00:19,  1.52it/s]

[Epoch 47] Step 18300: Loss=2.6588, Top1=53.91%, Top5=69.34%, LR=0.000294


100%|██████████| 390/390 [04:56<00:00,  1.31it/s]
 18%|█▊        | 71/390 [00:55<04:27,  1.19it/s]

[Epoch 48] Step 18400: Loss=2.7388, Top1=47.46%, Top5=69.14%, LR=0.000293


 44%|████▍     | 171/390 [02:10<03:06,  1.17it/s]

[Epoch 48] Step 18500: Loss=2.5792, Top1=52.34%, Top5=70.31%, LR=0.000293


 69%|██████▉   | 271/390 [03:25<01:37,  1.21it/s]

[Epoch 48] Step 18600: Loss=2.6957, Top1=53.52%, Top5=68.95%, LR=0.000293


 95%|█████████▌| 371/390 [04:40<00:16,  1.17it/s]

[Epoch 48] Step 18700: Loss=2.6814, Top1=50.78%, Top5=68.55%, LR=0.000293


100%|██████████| 390/390 [04:54<00:00,  1.33it/s]
 21%|██        | 81/390 [01:02<04:13,  1.22it/s]

[Epoch 49] Step 18800: Loss=2.9131, Top1=47.85%, Top5=65.82%, LR=0.000293


 46%|████▋     | 181/390 [02:21<03:05,  1.13it/s]

[Epoch 49] Step 18900: Loss=2.6417, Top1=52.93%, Top5=69.53%, LR=0.000293


 72%|███████▏  | 281/390 [03:39<01:36,  1.14it/s]

[Epoch 49] Step 19000: Loss=2.8591, Top1=47.66%, Top5=66.99%, LR=0.000293


 98%|█████████▊| 381/390 [04:55<00:07,  1.16it/s]

[Epoch 49] Step 19100: Loss=2.7573, Top1=47.85%, Top5=68.55%, LR=0.000293


100%|██████████| 390/390 [05:01<00:00,  1.29it/s]
 23%|██▎       | 91/390 [01:11<04:18,  1.16it/s]

[Epoch 50] Step 19200: Loss=2.7872, Top1=50.20%, Top5=63.87%, LR=0.000293


 49%|████▉     | 191/390 [02:29<02:57,  1.12it/s]

[Epoch 50] Step 19300: Loss=2.5848, Top1=53.12%, Top5=72.27%, LR=0.000293


 75%|███████▍  | 291/390 [03:45<01:23,  1.19it/s]

[Epoch 50] Step 19400: Loss=2.7477, Top1=48.83%, Top5=70.70%, LR=0.000293


100%|██████████| 390/390 [05:01<00:00,  1.29it/s]


In [17]:
import pandas as pd
import matplotlib.pyplot as plt

# Ganti path ini kalau perlu
csv_path = '/kaggle/working/logs/simclr/metrics.csv'

# Baca file CSV
df = pd.read_csv(csv_path)

# Plot diagram garis untuk kolom 'loss' terhadap 'epoch'
plt.plot(df['epoch'], df['loss'], marker='o', linestyle='-')

plt.title('Diagram Garis: Loss per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True)
plt.tight_layout()
plt.show()


KeyError: 'epoch'