In [1]:
import argparse
import datetime
import json
import numpy as np
import os
import time
from pathlib import Path

import torch
import torch.backends.cudnn as cudnn
from torch.utils.tensorboard import SummaryWriter
import torchvision.transforms as transforms
import torchvision.datasets as datasets

import timm

assert timm.__version__ == "0.3.2"  # version check
import timm.optim.optim_factory as optim_factory

In [3]:
# define the model
import sys, os
sys.path.append('/home/dongik/src/SSL-PE/ssl_pe/mae/util/')
sys.path.append('/home/dongik/src/SSL-PE/ssl_pe/mae/')

from ssl_pe.mae import models_mae

model = models_mae.__dict__['mae_vit_large_patch16'](norm_pix_loss=False)

In [4]:
assert np.__version__ == "1.19.5"

In [6]:
model.to('cuda')

MaskedAutoencoderViT(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
  )
  (blocks): ModuleList(
    (0): Block(
      (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=1024, out_features=3072, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=1024, out_features=1024, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=1024, out_features=4096, bias=True)
        (act): GELU()
        (fc2): Linear(in_features=4096, out_features=1024, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
    )
    (1): Block(
      (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=1024, o

In [12]:
ims = torch.randn(16, 3 ,224, 224)

out = model(ims)

In [13]:
type(out)

tuple

In [14]:
len(out)

3

In [16]:
out[0]

tensor(1.8464, grad_fn=<DivBackward0>)

In [20]:
out[1].size()

torch.Size([16, 196, 768])

In [19]:
out[2].size()

torch.Size([16, 196])

In [23]:
224 * 224 / (16 * 16)

196.0

In [25]:
print(str(model))

MaskedAutoencoderViT(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
  )
  (blocks): ModuleList(
    (0): Block(
      (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=1024, out_features=3072, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=1024, out_features=1024, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=1024, out_features=4096, bias=True)
        (act): GELU()
        (fc2): Linear(in_features=4096, out_features=1024, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
    )
    (1): Block(
      (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=1024, o

In [30]:
import util.misc as misc
from util.misc import NativeScalerWithGradNormCount as NativeScaler

import models_mae

from engine_pretrain import train_one_epoch

len(sys.path)

8

In [31]:
sys.path

['/home/dongik/src/SSL-PE',
 '/home/dongik/.conda/envs/mae/lib/python38.zip',
 '/home/dongik/.conda/envs/mae/lib/python3.8',
 '/home/dongik/.conda/envs/mae/lib/python3.8/lib-dynload',
 '',
 '/home/dongik/.conda/envs/mae/lib/python3.8/site-packages',
 '/home/dongik/src/SSL-PE/ssl_pe/mae/util/',
 '/home/dongik/src/SSL-PE/ssl_pe/mae/']

In [33]:
seed = args.seed + misc.get_rank()
torch.manual_seed(seed)
np.random.seed(seed)

NameError: name 'args' is not defined

In [34]:
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.2, 1.0), interpolation=3),  # 3 is bicubic
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

In [36]:
dataset_train = datasets.ImageFolder(
    os.path.join('/home/dongik/datasets/imagenet/ILSVRC/ILSVRC/Data/CLS-LOC/train'), transform=transform_train
)
print(dataset_train)

Dataset ImageFolder
    Number of datapoints: 1281167
    Root location: /home/dongik/datasets/imagenet/ILSVRC/ILSVRC/Data/CLS-LOC/train
    StandardTransform
Transform: Compose(
               RandomResizedCrop(size=(224, 224), scale=(0.2, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BICUBIC)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )


In [39]:
len(dataset_train)

1281167

In [40]:
num_tasks = misc.get_world_size()
global_rank = misc.get_rank()
sampler_train = torch.utils.data.DistributedSampler(
    dataset_train, num_replicas=num_tasks, rank=global_rank, shuffle=True
)
print("Sampler_train = %s" % str(sampler_train))

Sampler_train = <torch.utils.data.distributed.DistributedSampler object at 0x7f2dc5c2d730>


In [41]:
data_loader_train = torch.utils.data.DataLoader(
    dataset_train, sampler=sampler_train,
    batch_size=64,
    num_workers=10,
    pin_memory=True,
    drop_last=True,
)

In [48]:
epoch = 400
metric_logger = misc.MetricLogger(delimiter="  ")
metric_logger.add_meter('lr', misc.SmoothedValue(window_size=1, fmt='{value:.6f}'))
header = 'Epoch: [{}]'.format(epoch)
print_freq = 20

for data_iter_step, (samples, _) in enumerate(metric_logger.log_every(data_loader_train, print_freq, header)):
    print(samples.size())
    break

torch.Size([64, 3, 224, 224])
