# ArcFace + Swin
- dataset, label, arcface parameter, modify
- train, val, test, dfinitely split
- W&B logging
- pretrain = False, learning rates
- bounding
- ArcMarginProduct learned
- Search Result
- Aug

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from torch.optim.lr_scheduler import CosineAnnealingLR, CosineAnnealingWarmRestarts
# import torchvision
import timm
from torchvision import datasets, transforms
import time
from utils.utils import GaussianBlur , GeM
import math
import random
import numpy as np
import wandb

# from models.swintrans import PatentNet
# from models.swin_transformer_v2 import swinv2_base_window12to24_192to384

from pytorch_metric_learning import losses, testers
from pytorch_metric_learning.utils.accuracy_calculator import AccuracyCalculator

seed = 123
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True


[1712723662.642781] [w1qeblctr1712723101201-fvttg:125  :f]        vfs_fuse.c:281  UCX  ERROR inotify_add_watch(/tmp) failed: No space left on device


In [2]:
import os
# os.environ["http_proxy"] = "http://proxy.uec.ac.jp:8080/"
# os.environ["https_proxy"] = "http://proxy.uec.ac.jp:8080/"
# os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
os.environ["WANDB_NOTEBOOK_NAME"] = "ArcFace-DPat-SwinV2"

In [3]:

wandb.login()
wandb.init(project="ArcFace-Swin",
           notes="EffNetb5預訓練模型繼續訓練在新資料集",
           name="EffNetb5_Arc_0410_pretrainmodel_on_dbscan_crop"
           )
# wandb.finish()240410_Swinv2_Arc_dbscan_train

[34m[1mwandb[0m: Currently logged in as: [33mmin1000[0m ([33mmin1k[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [4]:

gem_pool = GeM()

class PatentNet(nn.Module):
    def __init__(self, model_name: str, pretrained: bool, embedding_size: int, in_features):
        super().__init__()
        
#         self.backbone = timm.create_model(model_name, pretrained=pretrained, num_classes=0, in_chans=1, global_pool="")
        self.backbone = timm.create_model(model_name, pretrained=pretrained, num_classes=0, in_chans=1)
        # in_features = 1280 # efficentnet_b0
        # in_features = in_features
        
#         in_features = 1280 # efnet
#         in_features = 2048 # resnet50
#         in_features = 768 # vit_b_16
        
        # in_features = 1024 # swin_base
        self.pool = gem_pool
        self.head = nn.Sequential(
        #     nn.Linear(in_features=1024, out_features=1000, bias=True),
            nn.Flatten(),
        #     nn.BatchNorm1d(in_features),
        #     nn.Dropout(),
            nn.Linear(in_features, embedding_size),
            nn.BatchNorm1d(embedding_size),
        #     nn.PReLU(),
        )
#         self.arcface = ArcMarginProduct(embedding_size,class_num)
    def forward(self, x):
        x = self.backbone(x)
#         x = self.pool(x) # 2D
        x = self.head(x)

        return x

In [5]:
app_count = 33364 # 出力クラス数

device = torch.device("cuda")

# model = PatentNet(model_name="swinv2_base_window12to24_192to384.ms_in22k_ft_in1k", pretrained="True", embedding_size=512, in_features = 1024).to(device) # 実験でfalse変更
# model = PatentNet(model_name='efficientnet_b4', pretrained="True", embedding_size=512 , in_features = 1792).to(device)
model = PatentNet(model_name='efficientnet_b5.sw_in12k_ft_in1k', pretrained="True", embedding_size=512 , in_features = 2048).to(device)
# model = PatentNet(model_name="resnet50", pretrained="True", embedding_size=1000).to(device)
tmp = torch.load("/work/v24684491/Saved_models/240326_EffNetb5_Arc_800768/Swinv2_Arc7.pth")
model.load_state_dict(tmp['state_dict'])
model = nn.DataParallel(model)

In [6]:
# print(model)

In [7]:
batch_size = 128
# batch_size = 8
log_interval = 5

num_epochs = 15

In [8]:
### MNIST code originally from https://github.com/pytorch/examples/blob/master/mnist/main.py ###
scaler = GradScaler()
def train(model, metric, loss_func, device, train_loader, optimizer, metric_optimizer, epoch, scheduler = None, metric_scheduler = None):
    print(f"Epoch {epoch} training start")
    model.train()
    total_loss = 0
    for batch_idx, (data, labels) in enumerate(train_loader):
        data, labels = data.to(device), labels.to(device) # labels
        optimizer.zero_grad()
        metric_optimizer.zero_grad()
        # output = model(data)
        # output = metric(output,labels)
        # loss = loss_func(output, labels)
        # loss.backward()

        # optimizer.step()
        # metric_optimizer.step()
        
        with autocast():
            output = model(data)
            output = metric(output,labels)
            
            loss = loss_func(output, labels)
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.step(metric_optimizer)
        scaler.update()

        total_loss += loss

        if batch_idx % 330 == 0:
            print("Epoch {} Iteration {}: Train Loss = {}".format(epoch, batch_idx, loss))
        if batch_idx % log_interval == 0:
            wandb.log({'epoch': epoch, 'train-iter': batch_idx, 'train-loss': loss})
        
        wandb.log({'lr': optimizer.param_groups[0]['lr'], 'metric_lr': metric_optimizer.param_groups[0]['lr']})
        
    if scheduler:
        scheduler.step()
        metric_scheduler.step()

    total_loss = total_loss/len(train_loader)
    wandb.log({'epoch': epoch, 'train-epoch-loss':total_loss})
    print("End of epoch {}".format(epoch))

In [9]:
### convenient function from pytorch-metric-learning ###
def get_all_embeddings(dataset, model):
    tester = testers.BaseTester()
    return tester.get_all_embeddings(dataset, model)

In [10]:
### compute accuracy using AccuracyCalculator from pytorch-metric-learning ###
def test(train_set, test_set, model, accuracy_calculator):
    torch.cuda.empty_cache()
    model.eval()
    with torch.no_grad():
        train_embeddings, train_labels = get_all_embeddings(train_set, model)
        test_embeddings, test_labels = get_all_embeddings(test_set, model)
        train_labels = train_labels.squeeze(1)
        # print(train_labels)
        test_labels = test_labels.squeeze(1)
        print("Computing accuracy")
        accuracies = accuracy_calculator.get_accuracy(
            test_embeddings, test_labels, train_embeddings,  train_labels, False
        )
        prec_1 = accuracies["precision_at_1"]
        map = accuracies["mean_average_precision"]
        print(f"Test set accuracy (Precision@1) = {prec_1}, (mAP) = {map}")
        wandb.log({'Prec@1': accuracies["precision_at_1"], 'mAP': accuracies["mean_average_precision"]})


In [11]:
# 計算測試損失
def test_calc(model, metric, loss_func, device, test_query_loader, epoch):
    model.eval()
    with torch.no_grad():
        total_loss = 0
        for batch_idx, (data, labels) in enumerate(test_query_loader):
            data, labels = data.to(device), labels.to(device) # labels
            embeddings = model(data)
            output = metric(embeddings,labels)
            loss = loss_func(output, labels)
            total_loss += loss
            
            if batch_idx % 50 == 0:
                print("Epoch {} Iteration {}: Test Loss = {}".format(epoch, batch_idx, loss))
            if batch_idx % log_interval == 0:
                wandb.log({'epoch': epoch, 'test-iter': batch_idx, 'test-loss': loss})
    
        total_loss = total_loss/len(test_query_loader)
        wandb.log({'epoch': epoch, 'test-epoch-loss':total_loss})
    

In [12]:
### compute accuracy using AccuracyCalculator from pytorch-metric-learning ###
def valuation(val_db_set, val_query_set, model, accuracy_calculator):
    torch.cuda.empty_cache()
    model.eval()
    with torch.no_grad():
        val_db_embeddings, val_db_labels = get_all_embeddings(val_db_set, model)
        val_query_embeddings, val_query_labels = get_all_embeddings(val_query_set, model)
        val_db_labels = val_db_labels.squeeze(1)
        val_query_labels = val_query_labels.squeeze(1)
        print("Val accuracy")
        accuracies = accuracy_calculator.get_accuracy(
            val_query_embeddings, val_query_labels,val_db_embeddings, val_db_labels, False
        )
        prec_1 = accuracies["precision_at_1"]
        map = accuracies["mean_average_precision"]
        print(f"Val set accuracy (Precision@1) = {prec_1}, (mAP) = {map}")
        wandb.log({'Val_Prec@1': accuracies["precision_at_1"], 'Val_mAP': accuracies["mean_average_precision"]})

In [13]:
# val損失の計算
def valuation_calc(model, metric, loss_func, device, val_query_loader, epoch):
    model.eval()
    with torch.no_grad():
        total_loss = 0
        for batch_idx, (data, labels) in enumerate(val_query_loader):
            data, labels = data.to(device), labels.to(device) # labels
            embeddings = model(data)
            output = metric(embeddings,labels)
            loss = loss_func(output, labels)
            total_loss += loss
            
            if batch_idx % 50 == 0:
                print("Epoch {} Iteration {}: Test Loss = {}".format(epoch, batch_idx, loss))
            if batch_idx % log_interval == 0:
                wandb.log({'epoch': epoch, 'val-iter': batch_idx, 'validation-loss': loss})
        
        total_loss = total_loss/len(val_query_loader)
        wandb.log({'epoch': epoch, 'validation-epoch-loss':total_loss})

## transform, dataset, dataloader

In [14]:
data_transform = {
    'train': transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize((768,768)), # 短辺384
        # transforms.RandomCrop((384,384), pad_if_needed=True, fill=255),
        # transforms.CenterCrop((768,768)),
        transforms.RandomHorizontalFlip(p=0.5),
        # transforms.RandomApply([GaussianBlur()], p=0.5),
        transforms.ToTensor(),
        # transforms.RandomErasing(p=0.5, scale=(0.22, 0.33), ratio=(0.3, 3.3), value=1, inplace=False), # ToTensorの後
        # transforms.Normalize((0.5), (0.5)),
    ]),
    'val': transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize(size=(768,768)),
        # transforms.CenterCrop((768,768)),
        transforms.ToTensor(),
        # transforms.Normalize((0.5), (0.5)),
    ])
}

## Datasetを作成

In [15]:
import os.path as osp
import glob
def make_datapath_list(phase):
    """
    phase: train or val
    path_list: データパスのリスト
    """
    
    rootpath = "/work/v24684491/DeepPatent/patent_data/"
#     rootpath = "/host/space0/higuchi-k/dpatent_dataset/patent_data/"

    if phase == "train":
        with open('./patlist/17_train_patent_trn_rebuild_2percentageup_sorted.txt') as f:
            lines = f.readlines()
    elif phase == "test_query":
        with open('./patlist/test_query_patent.txt') as f:
            lines = f.readlines()
    elif phase == "test_db":
        with open('./patlist/test_db_patent.txt') as f:
            lines = f.readlines()
    elif phase == "val_query":
        with open('./patlist/val_query_patent.txt') as f:
            lines = f.readlines()
    elif phase == "val_db":
        with open('./patlist/val_db_patent.txt') as f:
            lines = f.readlines()
            
    path_list = [] # ここに格納
    for path in lines:
        path = path.split()[0] # 0:path only, 1: label
        path_list.append(rootpath+path)
        
    return path_list

train_list = make_datapath_list(phase="train")

test_query_list = make_datapath_list(phase="test_query")
test_db_list = make_datapath_list(phase="test_db")

val_query_list = make_datapath_list(phase="val_query")
val_db_list = make_datapath_list(phase="val_db")

# print(train_list[0:3] , "\n")
# print("train_list:", len(train_list), "\n")
# print("test_query_list:", len(test_query_list))
# print("test_db_list:", len(test_db_list), "\n")
# print("val_query_list:", len(val_query_list))
# print("val_db_list", len(val_db_list))

In [16]:
def make_label_list(phase):
    """
    phase: train or val
    label_list: データラベルのリスト
    """
    
    rootpath = "/home/hmc/DeepPatent/patent_data/"
#     rootpath = "/host/space0/higuchi-k/dpatent_dataset/patent_data/"

    if phase == "train":
        with open('./patlist/17_train_patent_trn_rebuild_2percentageup_sorted.txt') as f:
            lines = f.readlines()
    elif phase == "test_query":
        with open('./patlist/test_query_patent.txt') as f:
            lines = f.readlines()
    elif phase == "test_db":
        with open('./patlist/test_db_patent.txt') as f:
            lines = f.readlines()
    elif phase == "val_query":
        with open('./patlist/val_query_patent.txt') as f:
            lines = f.readlines()
    elif phase == "val_db":
        with open('./patlist/val_db_patent.txt') as f:
            lines = f.readlines()
        
    label_list = [] # ここに格納
    for label in lines:
        label = label.split()[1] # 0:path only, 1: label
        label = int(label)
        label_list.append(label)
        
    return label_list

train_label_list = make_label_list(phase="train")

test_query_label_list = make_label_list(phase="test_query")
test_db_label_list = make_label_list(phase="test_db")

val_query_label_list = make_label_list(phase="val_query")
val_db_label_list = make_label_list(phase="val_db")

# print(train_label_list[0:3])
# print(type(train_label_list[0]))

In [17]:
import copy
import torch.utils.data as data
from PIL import Image
import cv2

def cv2pil(image):
    ''' OpenCV型 -> PIL型 '''
    new_image = image.copy()
    if new_image.ndim == 2:  # 單色
        pass
    elif new_image.shape[2] == 3:  # 彩色
        new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
    elif new_image.shape[2] == 4:  # 透過
        new_image = cv2.cvtColor(new_image, cv2.COLOR_BGRA2RGBA)
    new_image = Image.fromarray(new_image)
    return new_image

class PatentDataset(data.Dataset):
    def __init__(self, file_list, label_list, transform):
        self.file_list = file_list
        self.label_list = label_list
        self.transform = transform
        
    def __len__(self):
        return len(self.file_list)
    
    def __getitem__(self, index):
        img_path = self.file_list[index]
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # open
        _, img = cv2.threshold(img, 120, 255, cv2.THRESH_BINARY)  # 二値化(白底為1，黑線為0)

        contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)  # 取得輪廓
        try:
            cnt = contours[0]
        except: 
            print(f"Error from {img_path}")

        x, y, w, h = cv2.boundingRect(cnt)  # 取得外接矩形
        img = img[y:y + h, x:x + w]  # 剪出外接矩形

        # 计算 padding 大小
        # max_side = max(w, h)
        # pad_x = (max_side - w) // 2
        # pad_y = (max_side - h) // 2

        # 进行 padding
        # img = cv2.copyMakeBorder(img, pad_y, pad_y, pad_x, pad_x, cv2.BORDER_CONSTANT, value=255)

        # _, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)  # 反二値化(白底為0，黑線為1)

        img = cv2pil(img)
        img = self.transform(img)

        label = self.label_list[index]
        label = label - 1  # 0-indexed [0, nclass-1]轉換為標籤

        return img, label


In [18]:
import os
# num_workers = os.cpu_count() - 1
num_workers = 16
print(num_workers)

16


In [19]:
train_data = PatentDataset(file_list=train_list, label_list=train_label_list, transform=data_transform['train'])

test_query_data = PatentDataset(file_list=test_query_list, label_list=test_query_label_list, transform=data_transform['val'])
test_db_data = PatentDataset(file_list=test_db_list, label_list=test_db_label_list, transform=data_transform['val'])

val_query_data = PatentDataset(file_list=val_query_list, label_list=val_query_label_list, transform=data_transform['val'])
val_db_data = PatentDataset(file_list=val_db_list, label_list=val_db_label_list, transform=data_transform['val'])

In [20]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True)

test_query_loader = torch.utils.data.DataLoader(test_query_data, batch_size=batch_size, num_workers=num_workers, drop_last=True)
test_db_loader = torch.utils.data.DataLoader(test_db_data, batch_size=batch_size, num_workers=num_workers, drop_last=True)

val_query_loader = torch.utils.data.DataLoader(val_query_data, batch_size=batch_size, num_workers=num_workers, drop_last=True)
val_db_loader = torch.utils.data.DataLoader(val_db_data, batch_size=batch_size, num_workers=num_workers, drop_last=True)

## 設定

In [21]:
from pytorch_metric_learning import losses, distances, regularizers

# distance = distances.CosineSimilarity()
# regularizer = regularizers.RegularFaceRegularizer()
# sampler = None

## ArcFace

In [22]:
class ArcMarginProduct(nn.Module):
    r"""Implement of large margin arc distance: :
        Args:
            in_features: size of each input sample
            out_features: size of each output sample
            s: norm of input feature
            m: margin
            cos(theta + m)
        """
    def __init__(self, in_features, out_features, s=64.0, m=0.50, easy_margin=False):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt((1.0 - torch.pow(cosine, 2)).clamp(0, 1))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device='cuda')
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s
        # print(output)

        return output

## loss, optimizer, accuracy設定

In [23]:
# model optimizer
# optimizer = optim.Adam(model.parameters(), lr=scheduler_params['lr_start'])
optimizer = optim.Adam(model.parameters(), lr=1e-4)
metric = ArcMarginProduct(512, app_count, s=20.0, m=0.50, easy_margin=True).to(device)
metric_optimizer = optim.Adam(metric.parameters(), lr=5e-3)

# optimizer = optim.SGD(model.parameters(), lr=1e-4)
# metric_optimizer = optim.SGD(metric.parameters(), lr=5e-3)

# Defining LR SCheduler
# scheduler = JPOScheduler(optimizer, **scheduler_params)
scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs )
metric_scheduler = CosineAnnealingLR(metric_optimizer, T_max=num_epochs )

# scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=1, T_mult=2)
# metric_scheduler = CosineAnnealingWarmRestarts(metric_optimizer, T_0=1, T_mult=2)

In [24]:
# gamma = 2.0 変更
class FocalLoss(nn.Module):

    def __init__(self, gamma=1.5, eps=1e-7):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.eps = eps
        self.ce = torch.nn.CrossEntropyLoss()

    def forward(self, input, target):
        logp = self.ce(input, target)
        p = torch.exp(-logp)
        loss = (1 - p) ** self.gamma * logp
        return loss.mean()

In [25]:
### pytorch-metric-learning stuff ###

# loss_func = losses.ArcFaceLoss(num_classes=app_count, embedding_size=1000, margin=28.6, scale=64, weight_regularizer=regularizer, distance=distance).to(device)
# loss_func = losses.ArcFaceLoss(num_classes=app_count, embedding_size=1000, margin=0.1, scale=8, weight_regularizer=regularizer, distance=distance).to(device)
# loss_func = losses.SubCenterArcFaceLoss(num_classes=app_count, embedding_size=1000, margin=28.6, scale=64, sub_centers=3, weight_regularizer=regularizer, distance=distance).to(device)
# loss_func = nn.CrossEntropyLoss()
loss_func = FocalLoss()

# loss_optimizer = optim.Adam(loss_func.parameters(), lr=1e-5) # from 1e-4


accuracy_calculator = AccuracyCalculator(include=("precision_at_1", "mean_average_precision"), k="max_bin_count")
### pytorch-metric-learning stuff ###

In [26]:
import os
import torch

file_name = "/work/v24684491/Saved_models/240410_EffNetb5_Arc_dbscan_train/"
if not os.path.exists(file_name):
    os.makedirs(file_name)
# print(file_name + 'eff-arcface-test.pth')
# torch.save(model.state_dict(), file_name +'Swinv2_Arc.pth')

## Train the model

In [None]:
# tmp = torch.load("/work/v24684491/Saved_models/Swinv2_Arc_1217_only_400384/Swinv2_Arc8.pth")
# model.load_state_dict(tmp['state_dict'])
# optimizer.load_state_dict(tmp['optimizer_state_dict'])
# metric.load_state_dict(tmp['metric_dict'])
# metric_optimizer.load_state_dict(tmp['metric_optimizer_dict'])
# optimizer.param_groups[0]['lr'] = lr=1e-4
# metric_optimizer.param_groups[0]['lr'] = 5e-3
# model = nn.DataParallel(model)
# scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs )
# metric_scheduler = CosineAnnealingLR(metric_optimizer, T_max=num_epochs )
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer , step_size = 5,gamma= 0.1, last_epoch= -1)
# metric_scheduler = torch.optim.lr_scheduler.StepLR(metric_optimizer , step_size = 5,gamma= 0.1, last_epoch= -1)

for epoch in range(1, num_epochs + 1):
    
    time_start = time.time()
#     train(model, loss_func, device, train_loader, optimizer, loss_optimizer, epoch)
    train(model, metric, loss_func, device, train_loader, optimizer, metric_optimizer, epoch, scheduler, metric_scheduler)
#     train(model, metric, loss_func, device, train_loader, optimizer, metric_optimizer, epoch)
    
    # 古くて低い性能のモデルは，こまめに削除してディスク容量対策する
    torch.save({'state_dict': model.module.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            "metric_optimizer_dict" : metric_optimizer.state_dict(),
            "metric_dict" : metric.state_dict(),
            "lr_scheduler_dict" : scheduler.state_dict(),
            "metric_lr_scheduler_dict" : metric_scheduler.state_dict(),
            'epochs' : epoch }, file_name +'Swinv2_Arc{}.pth'.format(epoch))
    
    # torch.save({"state_dict":model.state_dict(),"epoch":epoch}, file_name +'EffArc_Gem-test_epoch{}.pth'.format(epoch))
    
    time_end = time.time()    #結束計時
    time_c= time_end - time_start   #執行所花時間
    print(f'Epoch {epoch} training time cost {round(time_c/60,2)} mins') 
    
    test(test_db_data, test_query_data, model, accuracy_calculator)

    val_time_end = time.time()    #結束計時
    time_c= val_time_end - time_end   #執行所花時間
    print(f'Epoch {epoch} test time cost {round(time_c/60,2)} mins')

wandb.finish()

Epoch 1 training start
Epoch 1 Iteration 0: Train Loss = 16.013107299804688
Epoch 1 Iteration 330: Train Loss = 14.401381492614746
Epoch 1 Iteration 660: Train Loss = 14.822908401489258
Epoch 1 Iteration 990: Train Loss = 14.024886131286621
Epoch 1 Iteration 1320: Train Loss = 12.64328384399414
Epoch 1 Iteration 1650: Train Loss = 12.921857833862305
Epoch 1 Iteration 1980: Train Loss = 12.911544799804688
Epoch 1 Iteration 2310: Train Loss = 12.65061092376709
Epoch 1 Iteration 2640: Train Loss = 11.84924030303955
Epoch 1 Iteration 2970: Train Loss = 11.738849639892578
Epoch 1 Iteration 3300: Train Loss = 12.773321151733398
Epoch 1 Iteration 3630: Train Loss = 11.384119033813477
Epoch 1 Iteration 3960: Train Loss = 12.204998970031738
Epoch 1 Iteration 4290: Train Loss = 11.027189254760742
End of epoch 1
Epoch 1 training time cost 71.77 mins


100%|██████████| 1214/1214 [09:44<00:00,  2.08it/s]
100%|██████████| 411/411 [03:02<00:00,  2.25it/s]


Computing accuracy


  x.storage().data_ptr() + x.storage_offset() * 4)


Test set accuracy (Precision@1) = 0.9475367395111551, (mAP) = 0.8640100383986594
Epoch 1 test time cost 12.9 mins
Epoch 2 training start
Epoch 2 Iteration 0: Train Loss = 11.843191146850586
Epoch 2 Iteration 330: Train Loss = 11.024640083312988
Epoch 2 Iteration 660: Train Loss = 9.989233016967773
Epoch 2 Iteration 990: Train Loss = 11.562463760375977
Epoch 2 Iteration 1320: Train Loss = 10.903096199035645
Epoch 2 Iteration 1650: Train Loss = 10.840314865112305
Epoch 2 Iteration 1980: Train Loss = 9.962678909301758
Epoch 2 Iteration 2310: Train Loss = 10.130949974060059
Epoch 2 Iteration 2640: Train Loss = 10.49455738067627
Epoch 2 Iteration 2970: Train Loss = 9.891399383544922
Epoch 2 Iteration 3300: Train Loss = 10.073354721069336
Epoch 2 Iteration 3630: Train Loss = 10.768989562988281
Epoch 2 Iteration 3960: Train Loss = 9.64596176147461
Epoch 2 Iteration 4290: Train Loss = 9.480974197387695
End of epoch 2
Epoch 2 training time cost 72.06 mins


100%|██████████| 1214/1214 [09:13<00:00,  2.19it/s]
100%|██████████| 411/411 [03:13<00:00,  2.12it/s]


Computing accuracy
Test set accuracy (Precision@1) = 0.9526383918373562, (mAP) = 0.8774715451917586
Epoch 2 test time cost 12.54 mins
Epoch 3 training start
Epoch 3 Iteration 0: Train Loss = 9.047769546508789
Epoch 3 Iteration 330: Train Loss = 9.231979370117188
Epoch 3 Iteration 660: Train Loss = 9.462691307067871
Epoch 3 Iteration 990: Train Loss = 9.642138481140137
Epoch 3 Iteration 1320: Train Loss = 9.108382225036621
Epoch 3 Iteration 1650: Train Loss = 9.886247634887695
Epoch 3 Iteration 1980: Train Loss = 9.715089797973633
Epoch 3 Iteration 2310: Train Loss = 8.8630952835083
Epoch 3 Iteration 2640: Train Loss = 9.286005973815918


wandb: Network error (ReadTimeout), entering retry loop.


Epoch 3 Iteration 2970: Train Loss = 8.868804931640625
Epoch 3 Iteration 3300: Train Loss = 9.285984992980957
Epoch 3 Iteration 3630: Train Loss = 8.40355110168457
Epoch 3 Iteration 3960: Train Loss = 8.600994110107422
Epoch 3 Iteration 4290: Train Loss = 9.851364135742188
End of epoch 3
Epoch 3 training time cost 72.61 mins


100%|██████████| 1214/1214 [09:47<00:00,  2.07it/s]
100%|██████████| 411/411 [02:59<00:00,  2.30it/s]


Computing accuracy
Test set accuracy (Precision@1) = 0.9540089849996193, (mAP) = 0.8784491147079492
Epoch 3 test time cost 12.86 mins
Epoch 4 training start
Epoch 4 Iteration 0: Train Loss = 9.389657974243164
Epoch 4 Iteration 330: Train Loss = 8.162039756774902
Epoch 4 Iteration 660: Train Loss = 8.07362174987793
Epoch 4 Iteration 990: Train Loss = 9.08753490447998
Epoch 4 Iteration 1320: Train Loss = 9.282567024230957
Epoch 4 Iteration 1650: Train Loss = 9.868605613708496
Epoch 4 Iteration 1980: Train Loss = 8.436516761779785
Epoch 4 Iteration 2310: Train Loss = 9.031244277954102
Epoch 4 Iteration 2640: Train Loss = 7.921779632568359
Epoch 4 Iteration 2970: Train Loss = 8.143840789794922
Epoch 4 Iteration 3300: Train Loss = 8.60838794708252
Epoch 4 Iteration 3630: Train Loss = 8.671504974365234
Epoch 4 Iteration 3960: Train Loss = 8.186119079589844
Epoch 4 Iteration 4290: Train Loss = 8.729968070983887
End of epoch 4
Epoch 4 training time cost 72.69 mins


100%|██████████| 1214/1214 [10:05<00:00,  2.00it/s]
100%|██████████| 411/411 [03:03<00:00,  2.24it/s]


Computing accuracy
Test set accuracy (Precision@1) = 0.9557602984847331, (mAP) = 0.8822234460723638
Epoch 4 test time cost 13.23 mins
Epoch 5 training start
Epoch 5 Iteration 0: Train Loss = 8.118392944335938
Epoch 5 Iteration 330: Train Loss = 7.804272174835205
Epoch 5 Iteration 660: Train Loss = 9.203898429870605
Epoch 5 Iteration 990: Train Loss = 7.9780144691467285
Epoch 5 Iteration 1320: Train Loss = 8.133490562438965
Epoch 5 Iteration 1650: Train Loss = 7.792478084564209
Epoch 5 Iteration 1980: Train Loss = 6.937102794647217
Epoch 5 Iteration 2310: Train Loss = 7.241811752319336
Epoch 5 Iteration 2640: Train Loss = 7.638871669769287
Epoch 5 Iteration 2970: Train Loss = 8.572898864746094
Epoch 5 Iteration 3300: Train Loss = 8.11917781829834
Epoch 5 Iteration 3630: Train Loss = 8.459342002868652
Epoch 5 Iteration 3960: Train Loss = 7.769558906555176
Epoch 5 Iteration 4290: Train Loss = 7.560789585113525
End of epoch 5
Epoch 5 training time cost 72.97 mins


100%|██████████| 1214/1214 [08:55<00:00,  2.27it/s]
100%|██████████| 411/411 [03:07<00:00,  2.20it/s]


Computing accuracy
Test set accuracy (Precision@1) = 0.9549227137744613, (mAP) = 0.8780393173556224
Epoch 5 test time cost 12.13 mins
Epoch 6 training start
Epoch 6 Iteration 0: Train Loss = 7.5110297203063965
Epoch 6 Iteration 330: Train Loss = 7.689380168914795
Epoch 6 Iteration 660: Train Loss = 6.572943687438965
Epoch 6 Iteration 990: Train Loss = 7.5818376541137695
Epoch 6 Iteration 1320: Train Loss = 7.613896369934082
Epoch 6 Iteration 1650: Train Loss = 6.724954128265381
Epoch 6 Iteration 1980: Train Loss = 7.959171772003174
Epoch 6 Iteration 2310: Train Loss = 7.767221927642822
Epoch 6 Iteration 2640: Train Loss = 7.4252028465271
Epoch 6 Iteration 2970: Train Loss = 8.406665802001953
Epoch 6 Iteration 3300: Train Loss = 7.206853866577148
Epoch 6 Iteration 3630: Train Loss = 7.717713356018066
Epoch 6 Iteration 3960: Train Loss = 7.570240020751953
Epoch 6 Iteration 4290: Train Loss = 6.5369744300842285
End of epoch 6
Epoch 6 training time cost 72.87 mins


100%|██████████| 1214/1214 [09:03<00:00,  2.23it/s]
100%|██████████| 411/411 [03:28<00:00,  1.98it/s]


Computing accuracy
Test set accuracy (Precision@1) = 0.9573593238407065, (mAP) = 0.8857916262763188
Epoch 6 test time cost 12.6 mins
Epoch 7 training start
Epoch 7 Iteration 0: Train Loss = 7.051388740539551
Epoch 7 Iteration 330: Train Loss = 6.490564823150635
Epoch 7 Iteration 660: Train Loss = 6.88668966293335
Epoch 7 Iteration 990: Train Loss = 8.113783836364746
Epoch 7 Iteration 1320: Train Loss = 6.950926780700684
Epoch 7 Iteration 1650: Train Loss = 6.798108100891113
Epoch 7 Iteration 1980: Train Loss = 7.163785934448242
Epoch 7 Iteration 2310: Train Loss = 7.413910865783691
Epoch 7 Iteration 2640: Train Loss = 6.739411354064941
Epoch 7 Iteration 2970: Train Loss = 6.486433029174805
Epoch 7 Iteration 3300: Train Loss = 7.657275199890137
Epoch 7 Iteration 3630: Train Loss = 8.076608657836914
Epoch 7 Iteration 3960: Train Loss = 6.154167175292969
Epoch 7 Iteration 4290: Train Loss = 7.914617538452148
End of epoch 7
Epoch 7 training time cost 72.97 mins


100%|██████████| 1214/1214 [08:54<00:00,  2.27it/s]
100%|██████████| 411/411 [03:16<00:00,  2.10it/s]


Computing accuracy
Test set accuracy (Precision@1) = 0.9562171628721541, (mAP) = 0.8799269997997387
Epoch 7 test time cost 12.27 mins
Epoch 8 training start
Epoch 8 Iteration 0: Train Loss = 7.182656764984131
Epoch 8 Iteration 330: Train Loss = 6.332995891571045
Epoch 8 Iteration 660: Train Loss = 6.446353435516357
Epoch 8 Iteration 990: Train Loss = 6.645884990692139
Epoch 8 Iteration 1320: Train Loss = 6.956620216369629
Epoch 8 Iteration 1650: Train Loss = 7.629325866699219
Epoch 8 Iteration 1980: Train Loss = 6.086648464202881
Epoch 8 Iteration 2310: Train Loss = 7.07792854309082
Epoch 8 Iteration 2640: Train Loss = 6.672582149505615
Epoch 8 Iteration 2970: Train Loss = 6.807537078857422
Epoch 8 Iteration 3300: Train Loss = 7.160943031311035
Epoch 8 Iteration 3630: Train Loss = 6.568235397338867
Epoch 8 Iteration 3960: Train Loss = 6.731245994567871
Epoch 8 Iteration 4290: Train Loss = 6.301626682281494
End of epoch 8
Epoch 8 training time cost 72.88 mins


100%|██████████| 1214/1214 [08:52<00:00,  2.28it/s]
100%|██████████| 411/411 [03:06<00:00,  2.20it/s]


Computing accuracy
Test set accuracy (Precision@1) = 0.9549227137744613, (mAP) = 0.8748594554287081
Epoch 8 test time cost 12.07 mins
Epoch 9 training start
Epoch 9 Iteration 0: Train Loss = 5.649981498718262
Epoch 9 Iteration 330: Train Loss = 6.37014627456665
Epoch 9 Iteration 660: Train Loss = 6.64243221282959
Epoch 9 Iteration 990: Train Loss = 6.462119102478027
Epoch 9 Iteration 1320: Train Loss = 6.171957969665527
Epoch 9 Iteration 1650: Train Loss = 7.107519149780273
Epoch 9 Iteration 1980: Train Loss = 6.850185871124268
Epoch 9 Iteration 2310: Train Loss = 6.693580627441406
Epoch 9 Iteration 2640: Train Loss = 6.2504167556762695
Epoch 9 Iteration 2970: Train Loss = 6.7143635749816895
Epoch 9 Iteration 3300: Train Loss = 6.30639123916626
Epoch 9 Iteration 3630: Train Loss = 6.727115631103516
Epoch 9 Iteration 3960: Train Loss = 8.225317001342773
Epoch 9 Iteration 4290: Train Loss = 6.892694473266602
End of epoch 9
Epoch 9 training time cost 72.78 mins


100%|██████████| 1214/1214 [11:29<00:00,  1.76it/s]
 12%|█▏        | 50/411 [00:30<03:19,  1.81it/s]