In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from fastai.vision.all import *
from self_supervised.layers import *
import sklearn

In [3]:
from utils.custom_vit import *
from utils.attention import *
from utils.object_crops import *
from utils.part_crops import *
from utils.multi_crop_model import *

In [4]:
from fastai.callback.wandb import WandbCallback
import wandb

In [5]:
datapath = Path("../data/stanford-dogs-dataset/")

In [6]:
train_df = pd.read_csv(datapath/'train.csv')
test_df = pd.read_csv(datapath/'test.csv')
sample_df = pd.read_csv(datapath/'sample_train.csv')

In [7]:
train_df.shape, test_df.shape, sample_df.shape

((12000, 2), (8580, 2), (6000, 3))

### Dataset

In [8]:
def read_image(filename): return PILImage.create(datapath/'images/Images'/filename)
def read_image_size(filename): return PILImage.create(datapath/'images/Images'/filename).shape

In [9]:
FAST = False

In [10]:
if FAST:
    filenames = sample_df['filename'].values
    labels = sample_df['label'].values
    fn2label = dict(zip(filenames, labels))
else:
    filenames = train_df['filenames'].values
    labels = train_df['labels'].values
    fn2label = dict(zip(filenames, labels))

In [11]:
def read_label(filename): return fn2label[filename]

In [12]:
valid_filenames = sample_df.query("split == 'valid'")['filename'].values

In [13]:
size,bs = 448,16

tfms = [[read_image, ToTensor, RandomResizedCrop(size, min_scale=.75)], 
        [read_label, Categorize()]]

valid_splitter = lambda o: True if o in valid_filenames else False 
dsets = Datasets(filenames, tfms=tfms, splits=FuncSplitter(valid_splitter)(filenames))

batch_augs = aug_transforms()

stats = imagenet_stats

batch_tfms = [IntToFloatTensor] + batch_augs + [Normalize.from_stats(*stats)]
dls = dsets.dataloaders(bs=bs, after_batch=batch_tfms)

### Training

In [14]:
def model_splitter(m): return L(m.image_encoder, m.norm, m.classifier).map(params)

In [15]:
# timm vit _encoder
arch = "vit_base_patch16_384"
_encoder = create_encoder(arch, pretrained=True, n_in=3)

# custom vit encoder with timm weights
encoder = VisionTransformer(img_size=384, patch_size=16, embed_dim=768, depth=12, num_heads=12)
encoder.head = Identity()
encoder.load_state_dict(_encoder.state_dict());

In [16]:
metrics = accuracy
loss_func = LabelSmoothingCrossEntropyFlat(0.1)

In [17]:
mcvit_model = MultiCropViT(encoder,
                                input_res=384,
                                high_res=448,
                                min_obj_area=64*64,
                                crop_sz=128,
                                p_attn_erasing=0.5,
                                attn_erasing_thresh=0.7)

In [18]:
WANDB = True
if WANDB:
    xtra_config = dict(input_res=384,
                        high_res=448,
                        min_obj_area=64*64,
                        crop_sz=128,
                        p_attn_erasing=0.5,
                        attn_erasing_thresh=0.7)
    xtra_config.update({"Dataset":"Stanford Dogs"})
    wandb.init(project="fgvc-2021", config=xtra_config);

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkeremturgutlu[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.10.24 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


In [19]:
cbs = []
learn = Learner(dls, mcvit_model, opt_func=ranger, cbs=cbs, metrics=metrics, loss_func=loss_func, splitter=model_splitter)
learn.to_fp16();

In [None]:
epochs = 8
lr = 3e-3
learn.freeze_to(1)
learn.fit_one_cycle(epochs, lr_max=(lr), pct_start=0.5)

lr /= 3 
learn.unfreeze()
learn.fit_one_cycle(int(epochs**2), lr_max=[lr/10,lr,lr], pct_start=0.5)

epoch,train_loss,valid_loss,accuracy,time




In [None]:
for images in learn.collect_images.images[:5]:
    show_images([dls.after_batch.decode(o[None,...])[0] for o in images])