In [1]:
import os
import sys
import torch
import torch.optim as optim
import logging
import numpy as np
import argparse
from tqdm import tqdm
sys.path.append('..')
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Bernoulli
from torch.utils.data import Sampler
import torchvision.datasets as datasets
from functools import partial
from copy import deepcopy
from PIL import Image
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

## Path Manager

In [2]:
def Path_Manager(fewshot_path):
    train = os.path.join(fewshot_path,'train')
    val = os.path.join(fewshot_path,'valid')
    return train,val

In [3]:


def get_transform(is_training=None):

    mean=[0.485,0.456,0.406]
    std=[0.229,0.224,0.225]

    normalize = transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize(mean=mean,std=std)
                                    ])

    if is_training:
        size_transform = transforms.Resize([84,84])
        train_transform = transforms.Compose([size_transform,
                                            transforms.ColorJitter(brightness=0.4,contrast=0.4,saturation=0.4),
                                            transforms.RandomHorizontalFlip(),
                                            normalize
                                            ])
        return train_transform
    
    else:  
        size_transform = transforms.Resize([84,84])
        eval_transform = transforms.Compose([size_transform,normalize])
        
        return eval_transform
    
    
def image_loader(path,is_training):

    p = Image.open(path)
    p = p.convert('RGB')

    final_transform = get_transform(is_training=is_training)

    p = final_transform(p)

    return p

def get_dataset(data_path,is_training):

    dataset = datasets.ImageFolder(
        data_path,
        loader = lambda x: image_loader(path=x,is_training=is_training))

    return dataset

# sampler used for meta-training
class meta_batchsampler(Sampler):
    
    def __init__(self,data_source,way,shots):

        self.way = way
        self.shots = shots

        class2id = {}

        for i,(image_path,class_id) in enumerate(data_source.imgs):
            if class_id not in class2id:
                class2id[class_id]=[]
            class2id[class_id].append(i)

        self.class2id = class2id


    def __iter__(self):

        temp_class2id = deepcopy(self.class2id)
        for class_id in temp_class2id:
            np.random.shuffle(temp_class2id[class_id])       

        while len(temp_class2id) >= self.way:

            id_list = []

            list_class_id = list(temp_class2id.keys())

            pcount = np.array([len(temp_class2id[class_id]) for class_id in list_class_id])

            batch_class_id = np.random.choice(list_class_id,size=self.way,replace=False,p=pcount/sum(pcount))

            for shot in self.shots:
                for class_id in batch_class_id:
                    for _ in range(shot):
                        id_list.append(temp_class2id[class_id].pop())

            for class_id in batch_class_id:
                if len(temp_class2id[class_id])<sum(self.shots):
                    temp_class2id.pop(class_id)

            yield id_list
            
def meta_train_dataloader(data_path,way,shots):

    dataset = get_dataset(data_path=data_path,is_training=True)
#     print(dataset)

    loader = torch.utils.data.DataLoader(
        dataset,
        batch_sampler = meta_batchsampler(data_source=dataset,way=way,shots=shots),
        num_workers = 3,
        pin_memory = False)

    return loader
        

### Model

In [4]:
def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class DropBlock(nn.Module):
    def __init__(self, block_size):
        super(DropBlock, self).__init__()

        self.block_size = block_size


    def forward(self, x, gamma):
        # shape: (bsize, channels, height, width)

        if self.training:
            batch_size, channels, height, width = x.shape
            
            bernoulli = Bernoulli(gamma)
            mask = bernoulli.sample((batch_size, channels, height - (self.block_size - 1), width - (self.block_size - 1))).cuda()
            block_mask = self._compute_block_mask(mask)
            countM = block_mask.size()[0] * block_mask.size()[1] * block_mask.size()[2] * block_mask.size()[3]
            count_ones = block_mask.sum()

            return block_mask * x * (countM / count_ones)
        else:
            return x

    def _compute_block_mask(self, mask):
        left_padding = int((self.block_size-1) / 2)
        right_padding = int(self.block_size / 2)
        
        batch_size, channels, height, width = mask.shape
        non_zero_idxs = mask.nonzero()
        nr_blocks = non_zero_idxs.shape[0]

        offsets = torch.stack(
            [
                torch.arange(self.block_size).view(-1, 1).expand(self.block_size, self.block_size).reshape(-1), # - left_padding,
                torch.arange(self.block_size).repeat(self.block_size), #- left_padding
            ]
        ).t().cuda()
        offsets = torch.cat((torch.zeros(self.block_size**2, 2).cuda().long(), offsets.long()), 1)
        
        if nr_blocks > 0:
            non_zero_idxs = non_zero_idxs.repeat(self.block_size ** 2, 1)
            offsets = offsets.repeat(nr_blocks, 1).view(-1, 4)
            offsets = offsets.long()

            block_idxs = non_zero_idxs + offsets
            padded_mask = F.pad(mask, (left_padding, right_padding, left_padding, right_padding))
            padded_mask[block_idxs[:, 0], block_idxs[:, 1], block_idxs[:, 2], block_idxs[:, 3]] = 1.
        else:
            padded_mask = F.pad(mask, (left_padding, right_padding, left_padding, right_padding))
            
        block_mask = 1 - padded_mask#[:height, :width]
        return block_mask
    

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None, drop_rate=0.0, drop_block=False,
                 block_size=1,max_pool=True):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.LeakyReLU(0.1)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = conv3x3(planes, planes)
        self.bn3 = nn.BatchNorm2d(planes)
        self.maxpool = nn.MaxPool2d(stride)
        self.downsample = downsample
        self.stride = stride
        self.drop_rate = drop_rate
        self.num_batches_tracked = 0
        self.drop_block = drop_block
        self.block_size = block_size
        self.DropBlock = DropBlock(block_size=self.block_size)
        self.max_pool = max_pool

    def forward(self, x):
        self.num_batches_tracked += 1

        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        
        if self.max_pool:
            out = self.maxpool(out)

        if self.drop_rate > 0:
            if self.drop_block == True:
                feat_size = out.size()[2]
                keep_rate = max(1.0 - self.drop_rate / (20*2000) * (self.num_batches_tracked), 1.0 - self.drop_rate)
                gamma = (1 - keep_rate) / self.block_size**2 * feat_size**2 / (feat_size - self.block_size + 1)**2
                out = self.DropBlock(out, gamma=gamma)
            else:
                out = F.dropout(out, p=self.drop_rate, training=self.training, inplace=True)

        return out
    
class ResNet(nn.Module):

    def __init__(self, block, n_blocks, drop_rate=0.0, dropblock_size=5, max_pool=True):
        super(ResNet, self).__init__()

        self.inplanes = 3
        self.layer1 = self._make_layer(block, n_blocks[0], 64,
                                       stride=2, drop_rate=drop_rate)
        self.layer2 = self._make_layer(block, n_blocks[1], 160,
                                       stride=2, drop_rate=drop_rate)
        self.layer3 = self._make_layer(block, n_blocks[2], 320,
                                       stride=2, drop_rate=drop_rate, drop_block=True, block_size=dropblock_size)
        self.layer4 = self._make_layer(block, n_blocks[3], 640,
                                       stride=2, drop_rate=drop_rate, drop_block=True, block_size=dropblock_size,max_pool=max_pool)

        self.drop_rate = drop_rate
        
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='leaky_relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)


    def _make_layer(self, block, n_block, planes, stride=1, drop_rate=0.0, drop_block=False, block_size=1,max_pool=True):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=1, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        if n_block == 1:
            layer = block(self.inplanes, planes, stride, downsample, drop_rate, drop_block, block_size,max_pool=max_pool)
        else:
            layer = block(self.inplanes, planes, stride, downsample, drop_rate)
        layers.append(layer)
        self.inplanes = planes * block.expansion

        for i in range(1, n_block):
            if i == n_block - 1:
                layer = block(self.inplanes, planes, drop_rate=drop_rate, drop_block=drop_block,
                              block_size=block_size)
            else:
                layer = block(self.inplanes, planes, drop_rate=drop_rate)
            layers.append(layer)

        return nn.Sequential(*layers)

    def forward(self, x, is_feat=False):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        return x
    
    def resnet12(drop_rate=0.0, max_pool=True, **kwargs):
        """Constructs a ResNet-12 model.
        """
        model = ResNet(BasicBlock, [1, 1, 1, 1], drop_rate=drop_rate, max_pool=max_pool, **kwargs)
        return model




In [5]:
class FRN(nn.Module):
    
    def __init__(self,way=None,shots=None,is_pretraining=False,num_cat=None):
        
        super().__init__()

        num_channel = 640
        self.feature_extractor = ResNet.resnet12()

        self.shots = shots
        self.way = way

        # number of channels for the feature map, correspond to d in the paper
        self.d = num_channel
        
        # temperature scaling, correspond to gamma in the paper
        self.scale = nn.Parameter(torch.FloatTensor([1.0]),requires_grad=True)
        
        # H*W=5*5=25, resolution of feature map, correspond to r in the paper
        self.resolution = 25

        # correpond to [alpha, beta] in the paper
        # if is during pre-training, we fix them to 0
        self.r = nn.Parameter(torch.zeros(2),requires_grad=not is_pretraining)  
    

    def get_feature_map(self,inp):

        batch_size = inp.size(0)
        feature_map = self.feature_extractor(inp)
        
        feature_map = feature_map/np.sqrt(640)
        
        return feature_map.view(batch_size,self.d,-1).permute(0,2,1).contiguous() # N,HW,C
    

    def get_recon_dist(self,query,support,alpha,beta,Woodbury=True):
    # query: way*query_shot*resolution, d
    # support: way, shot*resolution , d
    # Woodbury: whether to use the Woodbury Identity as the implementation or not

        # correspond to kr/d in the paper
        reg = support.size(1)/support.size(2)
        
        # correspond to lambda in the paper
        lam = reg*alpha.exp()+1e-6

        # correspond to gamma in the paper
        rho = beta.exp()

        st = support.permute(0,2,1) # way, d, shot*resolution

        if Woodbury:
            # correspond to Equation 10 in the paper
            
            sts = st.matmul(support) # way, d, d
            m_inv = (sts+torch.eye(sts.size(-1)).to(sts.device).unsqueeze(0).mul(lam)).inverse() # way, d, d
            hat = m_inv.matmul(sts) # way, d, d
        
        else:
            # correspond to Equation 8 in the paper
            
            sst = support.matmul(st) # way, shot*resolution, shot*resolution
            m_inv = (sst+torch.eye(sst.size(-1)).to(sst.device).unsqueeze(0).mul(lam)).inverse() # way, shot*resolution, shot*resolutionsf 
            hat = st.matmul(m_inv).matmul(support) # way, d, d

        Q_bar = query.matmul(hat).mul(rho) # way, way*query_shot*resolution, d

        dist = (Q_bar-query.unsqueeze(0)).pow(2).sum(2).permute(1,0) # way*query_shot*resolution, way
        
        return dist

    
    def get_neg_l2_dist(self,inp,way,shot,query_shot,return_support=False):
        
        resolution = self.resolution
        d = self.d
        alpha = self.r[0]
        beta = self.r[1]
        
        feature_map = self.get_feature_map(inp)
        support = feature_map[:way*shot].view(way, shot*resolution , d)
        query = feature_map[way*shot:].view(way*query_shot*resolution, d)
        recon_dist = self.get_recon_dist(query=query,support=support,alpha=alpha,beta=beta) # way*query_shot*resolution, way
        neg_l2_dist = recon_dist.neg().view(way*query_shot,resolution,way).mean(1) # way*query_shot, way
        if return_support:
            return neg_l2_dist, support
        else:
            return neg_l2_dist


    def meta_test(self,inp,way,shot,query_shot):
        neg_l2_dist = self.get_neg_l2_dist(inp=inp,
                                        way=way,
                                        shot=shot,
                                        query_shot=query_shot)

        _,max_index = torch.max(neg_l2_dist,1)

        return max_index


    def forward(self,inp):

        neg_l2_dist, support = self.get_neg_l2_dist(inp=inp,
                                                    way=self.way,
                                                    shot=self.shots[0],
                                                    query_shot=self.shots[1],
                                                    return_support=True)
            
        logits = neg_l2_dist*self.scale
        log_prediction = F.log_softmax(logits,dim=1)

        return log_prediction, support

In [6]:
def auxrank(support):
        way = support.size(0)
        shot = support.size(1)
        support = support/support.norm(2).unsqueeze(-1)
        L1 = torch.zeros((way**2-way)//2).long().cuda()
        L2 = torch.zeros((way**2-way)//2).long().cuda()
        counter = 0
        for i in range(way):
            for j in range(i):
                L1[counter] = i
                L2[counter] = j
                counter += 1
        s1 = support.index_select(0, L1) # (s^2-s)/2, s, d
        s2 = support.index_select(0, L2) # (s^2-s)/2, s, d
        dists = s1.matmul(s2.permute(0,2,1)) # (s^2-s)/2, s, s
        assert dists.size(-1)==shot
        frobs = dists.pow(2).sum(-1).sum(-1)
        return frobs.sum().mul(.03)
    
class FRN_Train:
    
    def default_train(train_loader,model,optimizer,iter_counter):

        way = model.way
        query_shot = model.shots[-1]
        target = torch.LongTensor([i//query_shot for i in range(query_shot*way)]).cuda()
        criterion = nn.NLLLoss().cuda()
        lr = optimizer.param_groups[0]['lr']


        avg_frn_loss = 0
        avg_aux_loss = 0
        avg_loss = 0
        avg_acc = 0

        for i, (inp,_) in enumerate(train_loader):
            
            iter_counter += 1
            inp = inp.cuda()
            log_prediction, s = model(inp)
            frn_loss = criterion(log_prediction,target)
            aux_loss = auxrank(s)
            loss = frn_loss + aux_loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            _,max_index = torch.max(log_prediction,1)
            acc = 100*torch.sum(torch.eq(max_index,target)).item()/query_shot/way
            avg_acc += acc
            avg_frn_loss += frn_loss.item()
            avg_aux_loss += aux_loss.item()
            avg_loss += loss.item()

        avg_acc = avg_acc/(i+1)
        avg_loss = avg_loss/(i+1)
        avg_aux_loss = avg_aux_loss/(i+1)
        avg_frn_loss = avg_frn_loss/(i+1)

        return iter_counter,avg_acc

In [7]:
# sampler used for meta-testing
class random_sampler(Sampler):

    def __init__(self,data_source,way,shot,query_shot=16,trial=1000):

        class2id = {}

        for i,(image_path,class_id) in enumerate(data_source.imgs):
            if class_id not in class2id:
                class2id[class_id]=[]
            class2id[class_id].append(i)

        self.class2id = class2id
        self.way = way
        self.shot = shot
        self.trial = trial
        self.query_shot = query_shot

    def __iter__(self):

        way = self.way
        shot = self.shot
        trial = self.trial
        query_shot = self.query_shot
        
        class2id = deepcopy(self.class2id)        
        list_class_id = list(class2id.keys())

        for i in range(trial):

            id_list = []
            np.random.seed(42)
            np.random.shuffle(list_class_id)
            picked_class = list_class_id[:way]

            for cat in picked_class:
                np.random.shuffle(class2id[cat])
                
            for cat in picked_class:
                id_list.extend(class2id[cat][:shot])
            for cat in picked_class:
                id_list.extend(class2id[cat][shot:(shot+query_shot)])

            yield id_list
            
def get_score(acc_list):

    mean = np.mean(acc_list)
    interval = 1.96*np.sqrt(np.var(acc_list)/len(acc_list))

    return mean,interval

def meta_test_dataloader(data_path,way,shot,query_shot=16,trial=1000):

    dataset = get_dataset(data_path=data_path,is_training=False)

    loader = torch.utils.data.DataLoader(
        dataset,
        batch_sampler = random_sampler(data_source=dataset,way=way,shot=shot,query_shot=query_shot,trial=trial),
        num_workers = 3,
        pin_memory = False)

    return loader

def meta_test(data_path,model,way,shot,query_shot=16,trial=10000,return_list=False):

    eval_loader = meta_test_dataloader(data_path=data_path,
                                                way=way,
                                                shot=shot,
                                                query_shot=query_shot,
                                                trial=trial)
    
    target = torch.LongTensor([i//query_shot for i in range(query_shot*way)]).cuda()

    acc_list = []

    for i, (inp,_) in tqdm(enumerate(eval_loader)):

        inp = inp.cuda()
        max_index = model.meta_test(inp,way=way,shot=shot,query_shot=query_shot)
        acc = 100*torch.sum(torch.eq(max_index,target)).item()/query_shot/way
        acc_list.append(acc)

    if return_list:
        return np.array(acc_list)
    else:
        mean,interval = get_score(acc_list)
        return mean,interval

In [8]:
def get_opt(model, lr, weight_decay,epoch,gamma):

    
    optimizer = optim.SGD(model.parameters(),lr= lr,momentum=0.9,weight_decay=weight_decay,nesterov=True)

    scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=epoch,gamma=gamma)

    return optimizer,scheduler


class Train_Manager:

    def __init__(self,path_manager,train_func):

        seed = 42
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        np.random.seed(seed)
        torch.cuda.set_device(0)

        self.name = 'ResNet-12'
        self.opt = 'sgd'
        self.lr = 1e-1
        self.gamma = 1e-1
        self.epoch = 100
        self.stage = 2
        self.weight_decay = 5e-4
        self.train_way = 10
        
        suffix = '%s-lr_%.0e-gamma_%.0e-epoch_%d-stage_%d-decay_%.0e-way_%d' % (self.opt,
                self.lr,self.gamma,self.epoch,self.stage,self.weight_decay,self.train_way)

        name = "%s-%s"%(self.name,suffix)

        self.save_path = 'model_%s.pth' % (name)

        self.train_func = train_func
        self.pm = path_manager

    def train(self,model):

        
        train_func = self.train_func
        save_path = self.save_path

        optimizer,scheduler = get_opt(model, self.lr, self.weight_decay,self.epoch, self.gamma)

        val_shot = 1
        test_way = 194

        best_val_acc = 0
        best_epoch = 0

        model.train()
        model.cuda()

        iter_counter = 0

        total_epoch = self.epoch*self.stage

        print("start training!")

        for e in tqdm(range(total_epoch)):

            iter_counter,train_acc = train_func(model=model,
                                                optimizer=optimizer,
                                                iter_counter=iter_counter)

            if (e+1)%10==0:

                print("")
                print("epoch %d/%d, iter %d:" % (e+1,total_epoch,iter_counter))
                ("train_acc: %.3f" % (train_acc))

                model.eval()
                with torch.no_grad():
                    val_acc,val_interval = meta_test(data_path=self.pm[1],
                                                            model=model,
                                                            way=test_way,
                                                            shot=val_shot,
                                                            query_shot=1,
                                                            trial=10)

                print('val_%d-way-%d-shot_acc: %.3f\t%.3f'%(test_way,val_shot,val_acc,val_interval))

                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    best_epoch = e+1
#                     if not args.no_val:
                    torch.save(model.state_dict(),'best_'+save_path)
                    print('BEST!')

                model.train()

            scheduler.step()

        print('training finished!')
#         if args.no_val:
        torch.save(model.state_dict(),save_path)

        print('------------------------')
        print(('the best epoch is %d/%d') % (best_epoch,total_epoch))
        print(('the best %d-way %d-shot val acc is %.3f') % (test_way,val_shot,best_val_acc))


    def evaluate(self,model):


        print('------------------------')
        print('evaluating on test set:')

        with torch.no_grad():

            model.load_state_dict(torch.load(self.save_path))
            model.eval()

            for shot in [1]:

                mean,interval = meta_test(data_path=self.pm[1],
                                        model=model,
                                        way=194,
                                        shot=1,
                                        query_shot=1,
                                        trial=10)

                print('%d-way-%d-shot acc: %.2f\t%.2f'%(194,shot,mean,interval))

In [9]:
data_path = os.path.abspath('../input/snap-retail-data/')
fewshot_path = os.path.join(data_path,'final_data')

pm = Path_Manager(fewshot_path=fewshot_path)
train_way = 10
shots = [2, 1]
train_loader = meta_train_dataloader(data_path=pm[0],
                                                way=train_way,
                                                shots=shots)
model = FRN(way=train_way,
            shots=[2,1])
train_func = partial(FRN_Train.default_train,train_loader=train_loader)
tm = Train_Manager(path_manager=pm,train_func=train_func)

In [10]:
tm.train(model)

start training!


  4%|▍         | 9/200 [00:29<10:05,  3.17s/it]


epoch 10/200, iter 190:



0it [00:00, ?it/s][A
1it [00:03,  3.75s/it][A
2it [00:04,  2.06s/it][A
3it [00:05,  1.56s/it][A
4it [00:07,  1.57s/it][A
5it [00:08,  1.31s/it][A
6it [00:08,  1.15s/it][A
7it [00:10,  1.16s/it][A
8it [00:10,  1.05s/it][A
9it [00:11,  1.02it/s][A
10it [00:12,  1.26s/it]
  5%|▌         | 10/200 [00:45<23:08,  7.31s/it]

val_194-way-1-shot_acc: 55.309	0.870
BEST!


 10%|▉         | 19/200 [01:13<09:42,  3.22s/it]


epoch 20/200, iter 380:



0it [00:00, ?it/s][A
1it [00:03,  3.32s/it][A
2it [00:04,  1.86s/it][A
3it [00:05,  1.40s/it][A
4it [00:05,  1.18s/it][A
5it [00:06,  1.06s/it][A
6it [00:07,  1.02it/s][A
7it [00:08,  1.07it/s][A
8it [00:09,  1.12it/s][A
9it [00:09,  1.15it/s][A
10it [00:10,  1.09s/it]
 10%|█         | 20/200 [01:27<19:36,  6.53s/it]

val_194-way-1-shot_acc: 56.804	0.977
BEST!


 14%|█▍        | 29/200 [01:55<09:06,  3.20s/it]


epoch 30/200, iter 570:



0it [00:00, ?it/s][A
1it [00:02,  2.66s/it][A
2it [00:03,  1.58s/it][A
3it [00:04,  1.26s/it][A
4it [00:05,  1.16s/it][A
5it [00:06,  1.05s/it][A
6it [00:07,  1.03it/s][A
7it [00:07,  1.08it/s][A
8it [00:08,  1.12it/s][A
9it [00:09,  1.15it/s][A
10it [00:10,  1.04s/it]
 15%|█▌        | 30/200 [02:09<18:00,  6.36s/it]

val_194-way-1-shot_acc: 57.887	1.238
BEST!


 20%|█▉        | 39/200 [02:36<08:33,  3.19s/it]


epoch 40/200, iter 760:



0it [00:00, ?it/s][A
1it [00:02,  2.67s/it][A
2it [00:03,  1.59s/it][A
3it [00:04,  1.24s/it][A
4it [00:05,  1.13s/it][A
5it [00:06,  1.02s/it][A
6it [00:06,  1.05it/s][A
7it [00:08,  1.09s/it][A
8it [00:09,  1.01s/it][A
9it [00:09,  1.05it/s][A
10it [00:10,  1.09s/it]
 20%|██        | 40/200 [02:50<17:22,  6.51s/it]

val_194-way-1-shot_acc: 60.361	1.336
BEST!


 24%|██▍       | 49/200 [03:18<08:01,  3.19s/it]


epoch 50/200, iter 950:



0it [00:00, ?it/s][A
1it [00:03,  3.16s/it][A
2it [00:04,  1.80s/it][A
3it [00:04,  1.37s/it][A
4it [00:05,  1.23s/it][A
5it [00:06,  1.10s/it][A
6it [00:07,  1.01s/it][A
7it [00:08,  1.05s/it][A
8it [00:09,  1.02it/s][A
9it [00:10,  1.07it/s][A
10it [00:11,  1.12s/it]
 25%|██▌       | 50/200 [03:33<16:46,  6.71s/it]

val_194-way-1-shot_acc: 65.000	1.632
BEST!


 30%|██▉       | 59/200 [04:01<07:44,  3.29s/it]


epoch 60/200, iter 1140:



0it [00:00, ?it/s][A
1it [00:02,  2.91s/it][A
2it [00:03,  1.71s/it][A
3it [00:04,  1.32s/it][A
4it [00:05,  1.32s/it][A
5it [00:06,  1.15s/it][A
6it [00:07,  1.04s/it][A
7it [00:08,  1.05s/it][A
8it [00:09,  1.02it/s][A
9it [00:10,  1.07it/s][A
10it [00:11,  1.12s/it]
 30%|███       | 60/200 [04:16<15:44,  6.75s/it]

val_194-way-1-shot_acc: 62.680	1.327


 34%|███▍      | 69/200 [04:45<07:20,  3.36s/it]


epoch 70/200, iter 1330:



0it [00:00, ?it/s][A
1it [00:02,  2.90s/it][A
2it [00:03,  1.69s/it][A
3it [00:04,  1.32s/it][A
4it [00:05,  1.14s/it][A
5it [00:06,  1.04s/it][A
6it [00:07,  1.03it/s][A
7it [00:08,  1.00it/s][A
8it [00:09,  1.06it/s][A
9it [00:09,  1.10it/s][A
10it [00:10,  1.08s/it]
 35%|███▌      | 70/200 [04:59<14:22,  6.63s/it]

val_194-way-1-shot_acc: 61.186	0.686


 40%|███▉      | 79/200 [05:29<06:45,  3.35s/it]


epoch 80/200, iter 1520:



0it [00:00, ?it/s][A
1it [00:03,  3.74s/it][A
2it [00:04,  2.03s/it][A
3it [00:05,  1.48s/it][A
4it [00:06,  1.33s/it][A
5it [00:07,  1.16s/it][A
6it [00:08,  1.05s/it][A
7it [00:09,  1.04s/it][A
8it [00:10,  1.03it/s][A
9it [00:10,  1.08it/s][A
10it [00:11,  1.18s/it]
 40%|████      | 80/200 [05:44<13:49,  6.91s/it]

val_194-way-1-shot_acc: 64.278	1.246


 44%|████▍     | 89/200 [06:13<06:12,  3.36s/it]


epoch 90/200, iter 1710:



0it [00:00, ?it/s][A
1it [00:03,  3.07s/it][A
2it [00:03,  1.76s/it][A
3it [00:04,  1.34s/it][A
4it [00:05,  1.26s/it][A
5it [00:06,  1.12s/it][A
6it [00:07,  1.02s/it][A
7it [00:08,  1.01it/s][A
8it [00:09,  1.07it/s][A
9it [00:10,  1.10it/s][A
10it [00:11,  1.11s/it]


val_194-way-1-shot_acc: 66.753	1.342
BEST!

 45%|████▌     | 90/200 [06:28<12:28,  6.80s/it]




 50%|████▉     | 99/200 [06:55<05:24,  3.21s/it]


epoch 100/200, iter 1900:



0it [00:00, ?it/s][A
1it [00:02,  2.83s/it][A
2it [00:03,  1.66s/it][A
3it [00:04,  1.30s/it][A
4it [00:05,  1.13s/it][A
5it [00:06,  1.03s/it][A
6it [00:07,  1.03it/s][A
7it [00:08,  1.03s/it][A
8it [00:09,  1.04it/s][A
9it [00:09,  1.09it/s][A
10it [00:10,  1.08s/it]
 50%|█████     | 100/200 [07:10<10:49,  6.50s/it]

val_194-way-1-shot_acc: 65.000	1.638


 55%|█████▍    | 109/200 [07:38<04:57,  3.27s/it]


epoch 110/200, iter 2090:



0it [00:00, ?it/s][A
1it [00:03,  3.34s/it][A
2it [00:04,  1.86s/it][A
3it [00:05,  1.43s/it][A
4it [00:06,  1.47s/it][A
5it [00:07,  1.25s/it][A
6it [00:08,  1.11s/it][A
7it [00:09,  1.05s/it][A
8it [00:10,  1.03it/s][A
9it [00:10,  1.08it/s][A
10it [00:11,  1.18s/it]


val_194-way-1-shot_acc: 66.856	1.011


 55%|█████▌    | 110/200 [07:54<10:19,  6.88s/it]

BEST!


 60%|█████▉    | 119/200 [08:21<04:27,  3.30s/it]


epoch 120/200, iter 2280:



0it [00:00, ?it/s][A
1it [00:02,  2.60s/it][A
2it [00:03,  1.57s/it][A
3it [00:04,  1.25s/it][A
4it [00:05,  1.12s/it][A
5it [00:06,  1.02s/it][A
6it [00:06,  1.05it/s][A
7it [00:07,  1.08it/s][A
8it [00:08,  1.13it/s][A
9it [00:09,  1.15it/s][A
10it [00:10,  1.03s/it]
 60%|██████    | 120/200 [08:35<08:29,  6.37s/it]

val_194-way-1-shot_acc: 67.062	0.851
BEST!


 64%|██████▍   | 129/200 [09:02<03:48,  3.22s/it]


epoch 130/200, iter 2470:



0it [00:00, ?it/s][A
1it [00:02,  2.69s/it][A
2it [00:03,  1.60s/it][A
3it [00:04,  1.25s/it][A
4it [00:05,  1.09s/it][A
5it [00:06,  1.01s/it][A
6it [00:06,  1.06it/s][A
7it [00:07,  1.10it/s][A
8it [00:08,  1.14it/s][A
9it [00:09,  1.17it/s][A
10it [00:10,  1.02s/it]
 65%|██████▌   | 130/200 [09:16<07:21,  6.30s/it]

val_194-way-1-shot_acc: 67.680	0.715
BEST!


 70%|██████▉   | 139/200 [09:43<03:10,  3.12s/it]


epoch 140/200, iter 2660:



0it [00:00, ?it/s][A
1it [00:02,  2.74s/it][A
2it [00:03,  1.61s/it][A
3it [00:04,  1.27s/it][A
4it [00:05,  1.10s/it][A
5it [00:06,  1.01s/it][A
6it [00:06,  1.06it/s][A
7it [00:07,  1.09it/s][A
8it [00:08,  1.13it/s][A
9it [00:09,  1.16it/s][A
10it [00:10,  1.03s/it]
 70%|███████   | 140/200 [09:56<06:15,  6.25s/it]

val_194-way-1-shot_acc: 66.804	0.673


 74%|███████▍  | 149/200 [10:24<02:44,  3.23s/it]


epoch 150/200, iter 2850:



0it [00:00, ?it/s][A
1it [00:03,  3.57s/it][A
2it [00:04,  1.96s/it][A
3it [00:05,  1.46s/it][A
4it [00:06,  1.46s/it][A
5it [00:07,  1.24s/it][A
6it [00:08,  1.10s/it][A
7it [00:09,  1.07s/it][A
8it [00:10,  1.01it/s][A
9it [00:11,  1.06it/s][A
10it [00:11,  1.19s/it]


val_194-way-1-shot_acc: 68.660	0.586


 75%|███████▌  | 150/200 [10:40<05:47,  6.95s/it]

BEST!


 80%|███████▉  | 159/200 [11:09<02:20,  3.44s/it]


epoch 160/200, iter 3040:



0it [00:00, ?it/s][A
1it [00:02,  2.95s/it][A
2it [00:03,  1.71s/it][A
3it [00:04,  1.32s/it][A
4it [00:05,  1.13s/it][A
5it [00:06,  1.03s/it][A
6it [00:07,  1.04it/s][A
7it [00:08,  1.04s/it][A
8it [00:09,  1.03it/s][A
9it [00:10,  1.08it/s][A
10it [00:10,  1.09s/it]
 80%|████████  | 160/200 [11:23<04:28,  6.72s/it]

val_194-way-1-shot_acc: 68.969	0.636
BEST!


 84%|████████▍ | 169/200 [11:52<01:43,  3.33s/it]


epoch 170/200, iter 3230:



0it [00:00, ?it/s][A
1it [00:03,  3.18s/it][A
2it [00:04,  1.80s/it][A
3it [00:04,  1.37s/it][A
4it [00:06,  1.41s/it][A
5it [00:07,  1.20s/it][A
6it [00:07,  1.07s/it][A
7it [00:09,  1.11s/it][A
8it [00:09,  1.01s/it][A
9it [00:10,  1.04it/s][A
10it [00:11,  1.17s/it]


val_194-way-1-shot_acc: 69.021	0.524
BEST!

 85%|████████▌ | 170/200 [12:08<03:29,  6.97s/it]




 90%|████████▉ | 179/200 [12:36<01:09,  3.31s/it]


epoch 180/200, iter 3420:



0it [00:00, ?it/s][A
1it [00:03,  3.01s/it][A
2it [00:03,  1.75s/it][A
3it [00:04,  1.34s/it][A
4it [00:06,  1.36s/it][A
5it [00:06,  1.17s/it][A
6it [00:07,  1.05s/it][A
7it [00:08,  1.01s/it][A
8it [00:09,  1.06it/s][A
9it [00:10,  1.10it/s][A
10it [00:11,  1.12s/it]
 90%|█████████ | 180/200 [12:51<02:14,  6.73s/it]

val_194-way-1-shot_acc: 69.381	0.703
BEST!


 94%|█████████▍| 189/200 [13:19<00:36,  3.32s/it]


epoch 190/200, iter 3610:



0it [00:00, ?it/s][A
1it [00:03,  3.07s/it][A
2it [00:03,  1.75s/it][A
3it [00:04,  1.35s/it][A
4it [00:05,  1.20s/it][A
5it [00:06,  1.07s/it][A
6it [00:07,  1.01it/s][A
7it [00:08,  1.01s/it][A
8it [00:09,  1.05it/s][A
9it [00:10,  1.10it/s][A
10it [00:10,  1.10s/it]
 95%|█████████▌| 190/200 [13:33<01:05,  6.58s/it]

val_194-way-1-shot_acc: 69.330	0.704


100%|█████████▉| 199/200 [14:02<00:03,  3.27s/it]


epoch 200/200, iter 3800:



0it [00:00, ?it/s][A
1it [00:03,  3.11s/it][A
2it [00:03,  1.77s/it][A
3it [00:04,  1.34s/it][A
4it [00:05,  1.20s/it][A
5it [00:06,  1.09s/it][A
6it [00:07,  1.00it/s][A
7it [00:08,  1.06it/s][A
8it [00:09,  1.11it/s][A
9it [00:09,  1.14it/s][A
10it [00:10,  1.08s/it]
100%|██████████| 200/200 [14:16<00:00,  4.28s/it]


val_194-way-1-shot_acc: 69.742	0.607
BEST!
training finished!
------------------------
the best epoch is 200/200
the best 194-way 1-shot val acc is 69.742
