# If so, will such distinctively different cones remain if randomly initialized models are fully trained?


env: conda activate clip

https://github.com/SamsungLabs/pytorch-ensembles

In [24]:
import numpy as np
import torch
import pickle
import time
print("Torch version:", torch.__version__)

assert torch.__version__.split(".") >= ["1", "7", "1"], "PyTorch 1.7.1 or later is required"

import os
import matplotlib.pyplot as plt
from collections import OrderedDict
import torch

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

Torch version: 1.7.1


# Load CLIP

In [25]:
import clip

clip.available_models()

['RN50', 'RN101', 'RN50x4', 'RN50x16', 'ViT-B/32', 'ViT-B/16']

In [26]:
# ViT-B-32.json
# copied from https://github.com/mlfoundations/open_clip/blob/91f6cce16b7bee90b3b5d38ca305b5b3b67cc200/src/training/model_configs/ViT-B-32.json
model_info =  {
    "embed_dim": 512,
    "image_resolution": 224,
    "vision_layers": 12,
    "vision_width": 768,
    "vision_patch_size": 32,
    "context_length": 77,
    "vocab_size": 49408,
    "transformer_width": 512,
    "transformer_heads": 8,
    "transformer_layers": 12
} 

In [27]:
from torchvision import transforms
input_size = model_info['image_resolution']
preprocess = transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

In [28]:
type(preprocess)

torchvision.transforms.transforms.Compose

# Load Data

In [29]:
import torchvision
from torch.utils.data import DataLoader

def target_transform(caption_list):
    caption = caption_list[0] # only the first caption
    return clip.tokenize(caption)[0]

# coco_train_dataset = torchvision.datasets.CocoCaptions(
#                         root = '/home/ubuntu/data/coco/train2017',
#                         annFile = '/home/ubuntu/data/coco/annotations/captions_train2017.json',
#                         transform=preprocess,
#                         target_transform=target_transform,
#                         )

coco_val_dataset = torchvision.datasets.CocoCaptions(
                        root = '/home/ubuntu/data/coco/val2017',
                        annFile = '/home/ubuntu/data/coco/annotations/captions_val2017.json',
                        transform=preprocess,
                        target_transform=target_transform,
                        )

loading annotations into memory...
Done (t=0.04s)
creating index...
index created!


In [30]:
# coco_train_dataloader = DataLoader(coco_train_dataset, batch_size=64, shuffle=False, num_workers=8, pin_memory=True)
coco_val_dataloader = DataLoader(coco_val_dataset, batch_size=64, shuffle=False, num_workers=8, pin_memory=True)

# ResNet

In [31]:
import torch
import torch.nn as nn
import torchvision.models as models
from torch.autograd import Variable

In [32]:

deepens_imagenet = [
    'ImageNet-ResNet50-052e7f78e4db--1564492444-1.pth.tar', 
    'ImageNet-ResNet50-1132c260ef75--1564493784-1.pth.tar',
    'ImageNet-ResNet50-2f817072e8da--1564493734-1.pth.tar',
    'ImageNet-ResNet50-3177c697fbf4--1564495013-1.pth.tar',
    'ImageNet-ResNet50-628e11f9fd67--1564481099-1.pth.tar',
    'ImageNet-ResNet50-743e10f26a38--1564493675-1.pth.tar',
    'ImageNet-ResNet50-7ded66ec9900--1564481097-1.pth.tar',
    'ImageNet-ResNet50-8fc5076a66c9--1564481079-1.pth.tar',
    'ImageNet-ResNet50-a58ab8dd26fc--1564492521-1.pth.tar',
    'ImageNet-ResNet50-a80e40d84db2--1564492573-1.pth.tar',
    'ImageNet-ResNet50-be11903315ee--1564481101-1.pth.tar',
]

def load_model_states(model, filename):
    """
    Load a previously saved model states.
    https://github.com/SamsungLabs/pytorch-ensembles
    """
    with open(filename, 'rb') as f:
        # original saved file with DataParallel
        state_dict = torch.load(f)['state_dict']
        # create new OrderedDict that does not contain `module.`
        from collections import OrderedDict
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            name = k[7:] # remove `module.`
            new_state_dict[name] = v
        # load params
        model.load_state_dict(new_state_dict)



In [33]:
from clip.model import CLIP
def get_random_init_models(checkpoint_tar_name):

    resnet18 = models.resnet50(pretrained=False) # actually resnet 50
    load_model_states(resnet18, '../deepens_imagenet/' + checkpoint_tar_name)

    modules=list(resnet18.children())[:-1]
    resnet18=nn.Sequential(*modules)
    for p in resnet18.parameters():
        p.requires_grad = False

    resnet18.cuda().eval()
    target_model = resnet18
    return target_model


# Extractor loop


In [34]:
since = time.time()
dataloaders = {
    # 'train': coco_train_dataloader, 
    'val': coco_val_dataloader,
}


# Each epoch has a training and validation phase
for expriment_idx  in range(len(deepens_imagenet)):
    phase = 'val'
    target_model = get_random_init_models(checkpoint_tar_name=deepens_imagenet[expriment_idx])

    ##################################
    # Fields to be stored for postprocessing 
    ##################################

    target_image_features_list = []

    # Iterate over data.
    for inputs, captions in dataloaders[phase]:
        image_input = inputs.cuda(non_blocking=True)
        text_input = captions.cuda(non_blocking=True)
        
        with torch.set_grad_enabled(False):
            target_image_features = target_model(image_input).squeeze() 
            ##################################
            # Evaluation book-keeping Field 
            ##################################
            target_image_features_list.append( target_image_features.cpu().numpy() )

    ##################################
    # Evaluation book-keeping Field 
    ##################################
    target_image_features_list   = np.concatenate( target_image_features_list, axis=0)
    print('target_image_features_list', target_image_features_list.shape)

    dump_result_dict = {
        "target_image_features_list": target_image_features_list, 
        }
    
    feature_dir = 'features200'
    os.makedirs(feature_dir, exist_ok = True) 
    with open(os.path.join(feature_dir, 'feature_dump_{}.pkl'.format(expriment_idx) ), "wb") as pkl_file:
        pickle.dump(
            dump_result_dict, 
            pkl_file, 
        )

    time_elapsed = time.time() - since
    print('expriment_idx', expriment_idx)
    print('Feature Extraction completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))

target_image_features_list (5000, 2048)
expriment_idx 0
Feature Extraction completed in 0m 15s
target_image_features_list (5000, 2048)
expriment_idx 1
Feature Extraction completed in 0m 31s
target_image_features_list (5000, 2048)
expriment_idx 2
Feature Extraction completed in 0m 48s
target_image_features_list (5000, 2048)
expriment_idx 3
Feature Extraction completed in 1m 2s
target_image_features_list (5000, 2048)
expriment_idx 4
Feature Extraction completed in 1m 17s
target_image_features_list (5000, 2048)
expriment_idx 5
Feature Extraction completed in 1m 32s
target_image_features_list (5000, 2048)
expriment_idx 6
Feature Extraction completed in 1m 48s
target_image_features_list (5000, 2048)
expriment_idx 7
Feature Extraction completed in 2m 6s
target_image_features_list (5000, 2048)
expriment_idx 8
Feature Extraction completed in 2m 21s
target_image_features_list (5000, 2048)
expriment_idx 9
Feature Extraction completed in 2m 40s
target_image_features_list (5000, 2048)
expriment_id