In [158]:
import pickle
import clip
import socket
import numpy as np
import torch

%reload_ext autoreload
%autoreload 2

print("Hostname: " + socket.gethostname())
print("Torch version:", torch.__version__)

Hostname: sx-el-121920
Torch version: 1.13.0a0+d0d6b1f


# set device (for number crunching)

In [159]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# load datasets

In [160]:
from utils.mnist_preprocessing import *
from utils.mnist_plotting import *

# dataset parameters
DATASET_BATCH_SIZE = 128
DATASET_SHUFFLE = True

In [161]:
from torchvision import transforms

# initialize datasets
train_set = DatasetMNIST(root='./data',
                            env='train',
                            color=True,
                            opt_postfix="2classes",
                            filter=[5,8],
                            first_color_max_nr=5,
                            transform= transforms.Compose([transforms.ToTensor()]))

val_set = DatasetMNIST(root='./data',
                            env='val',
                            color=True,
                            opt_postfix="2classes",
                            filter=[5,8],
                            first_color_max_nr=5,
                            transform= transforms.Compose([transforms.ToTensor()]))

test_set = DatasetMNIST(root='./data',
                            env='test',
                            color=True,
                            opt_postfix="2classes",
                            filter=[5,8],
                            first_color_max_nr=5,
                            transform= transforms.Compose([transforms.ToTensor()]))

test_set_fool = DatasetMNIST(root='./data',
                            env='test_fool',
                            color=True,
                            opt_postfix="2classes",
                            filter=[5,8],
                            first_color_max_nr=5,
                            transform= transforms.Compose([transforms.ToTensor()]))

# create dataloaders
train_loader = torch.utils.data.DataLoader(dataset=train_set,
                                            batch_size=DATASET_BATCH_SIZE,
                                            shuffle=DATASET_SHUFFLE,
                                            num_workers=10)

val_loader = torch.utils.data.DataLoader(dataset=val_set,
                                            batch_size=DATASET_BATCH_SIZE,
                                            shuffle=DATASET_SHUFFLE,
                                            num_workers=10)

test_loader = torch.utils.data.DataLoader(dataset=test_set,
                                            batch_size=DATASET_BATCH_SIZE,
                                            shuffle=DATASET_SHUFFLE,
                                            num_workers=10)

test_fool_loader = torch.utils.data.DataLoader(dataset=test_set_fool,
                                            batch_size=DATASET_BATCH_SIZE,
                                            shuffle=DATASET_SHUFFLE,
                                            num_workers=10)


MNIST dataset already exists
MNIST dataset already exists
MNIST dataset already exists
MNIST dataset already exists


In [162]:
print(f"Number of training samples: {len(train_loader.dataset.data_label_tuples)}")
print(f"Number of validation samples: {len(val_loader.dataset.data_label_tuples)}")
print(f"Number of test samples: {len(test_loader.dataset.data_label_tuples)}")
print(f"Number of test fool samples: {len(test_fool_loader.dataset.data_label_tuples)}")

Number of training samples: 9425
Number of validation samples: 1888
Number of test samples: 1866
Number of test fool samples: 1866


# load standalone model

In [163]:
standalone_model = pickle.load(open("/home/patrick.koller/masterthesis/data/models/standalone_resnet50.mdl", 'rb'))
standalone_model.cuda().eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [164]:
# make sure that all standalone resnet50 parameters are floating point variables (paranoia)
for p in standalone_model.parameters(): 
    p.data = p.data.float() 

In [165]:
standalone_layers = []
for name, layer in standalone_model.named_modules():
    if "conv" in name:
        standalone_layers.append(name)
        
standalone_layers

['conv1',
 'layer1.0.conv1',
 'layer1.0.conv2',
 'layer1.0.conv3',
 'layer1.1.conv1',
 'layer1.1.conv2',
 'layer1.1.conv3',
 'layer1.2.conv1',
 'layer1.2.conv2',
 'layer1.2.conv3',
 'layer2.0.conv1',
 'layer2.0.conv2',
 'layer2.0.conv3',
 'layer2.1.conv1',
 'layer2.1.conv2',
 'layer2.1.conv3',
 'layer2.2.conv1',
 'layer2.2.conv2',
 'layer2.2.conv3',
 'layer2.3.conv1',
 'layer2.3.conv2',
 'layer2.3.conv3',
 'layer3.0.conv1',
 'layer3.0.conv2',
 'layer3.0.conv3',
 'layer3.1.conv1',
 'layer3.1.conv2',
 'layer3.1.conv3',
 'layer3.2.conv1',
 'layer3.2.conv2',
 'layer3.2.conv3',
 'layer3.3.conv1',
 'layer3.3.conv2',
 'layer3.3.conv3',
 'layer3.4.conv1',
 'layer3.4.conv2',
 'layer3.4.conv3',
 'layer3.5.conv1',
 'layer3.5.conv2',
 'layer3.5.conv3',
 'layer4.0.conv1',
 'layer4.0.conv2',
 'layer4.0.conv3',
 'layer4.1.conv1',
 'layer4.1.conv2',
 'layer4.1.conv3',
 'layer4.2.conv1',
 'layer4.2.conv2',
 'layer4.2.conv3']

# load CLIP model

In [166]:
clip_model, clip_preprocess = clip.load("RN50")
clip_model.cuda().eval()

CLIP(
  (visual): ModifiedResNet(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU(inplace=True)
    (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU(inplace=True)
    (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu3): ReLU(inplace=True)
    (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
     

In [167]:
# make sure that all clip parameters are floating point variables (paranoia)
for p in clip_model.parameters(): 
    p.data = p.data.float() 

In [168]:
# save layers to be swapped
clip_layers = ["visual.layer1", "visual.layer2", "visual.layer3", "visual.layer4"]

# activation matching

In [189]:
DATASET_BATCH_SIZE

128

In [191]:
for data in train_loader:
    print((data[0].shape[0]))
    break

128


In [194]:
from torchvision import transforms

def get_mean_std(model1, model1_layers, model2, model2_layers, dataloader, batch_size, device):
    print("Collecting Dataset Statistics")
    
    model1_stats_list = []
    model2_stats_list = []
    
    # batch processing
    with torch.no_grad():
        #??? Why is it called epochs? Isn't it just one epoch with n batches?
        #??? Why to divide by the number of epochs/batches? Assumed the batch hypothesis is correct, one computes the mean batch means/std.
        #??? What about the last batch, which usually differs in size, since the training dataset is not exactly divisible by the batch-size?
        for iteration, data in enumerate(dataloader):
            
            # copy data to GPU
            inputs, _, _, _ = data
            inputs = inputs.to(device)
            
            print(f"Batch {iteration} is being processed...")
            
            ###############################################################################
            # Compute activations and statistics for model 1
            ###############################################################################
            # inference first model
            _ = model1(inputs)

            # store activations from current batch
            model1_activations = {}
            for layer in model1_layers:
                model1_activations[layer] = []    
                model1_activation = model1.retained_layer(layer, clear = True)
                model1_activations[layer].append(model1_activation)
            
            # compute statistics (mean and standard deviation)
            batch_model1_stats_list = []
            for layer in model1_layers:
                model1_activations[layer] = torch.cat(model1_activations[layer], 0) #images x channels x m x m
                model1_activations[layer] = torch.permute(model1_activations[layer], (1,0,2,3)).contiguous() #channels x images x m x m
                model1_activations[layer] = model1_activations[layer].view(model1_activations[layer].shape[0], -1) 
                batch_model1_stats_list.append([torch.mean(model1_activations[layer],dim=-1, dtype=torch.float64).unsqueeze(0).unsqueeze(2).unsqueeze(3).to(device),\
                                      torch.std(model1_activations[layer], dim=-1).unsqueeze(0).unsqueeze(2).unsqueeze(3).to(device)])

            # cleanup
            del model1_activations
            model1_stats_list.append(batch_model1_stats_list)

            ###############################################################################
            # Compute activations and statistics for model 2
            ###############################################################################
            # preprocess images
            transform = transforms.ToPILImage()
            images_new = []
            for img in inputs:
                images_new.append(clip_preprocess(transform(img)))

            # building image features
            images = torch.tensor(np.stack(images_new)).cuda()
            
            # inference second model
            _ = model2.model.encode_image(images)

            # store activations from current batch
            model2_activations = {}
            for layer in model2_layers:
                model2_activations[layer] = []    
                model2_activation = model2.retained_layer(layer, clear = True)
                model2_activations[layer].append(model2_activation)

            # compute statistics (mean and standard deviation)
            batch_model2_stats_list = []
            model2_stats_list.append(batch_model2_stats_list)
            for layer in model2_layers:
                model2_activations[layer] = torch.cat(model2_activations[layer], 0)
                model2_activations[layer] = torch.permute(model2_activations[layer], (1,0,2,3)).contiguous()
                model2_activations[layer] = model2_activations[layer].view(model2_activations[layer].shape[0], -1)
                batch_model2_stats_list.append([torch.mean(model2_activations[layer], dim=-1, dtype=torch.float64).unsqueeze(0).unsqueeze(2).unsqueeze(3).to(device),\
                                      torch.std(model2_activations[layer], dim=-1).unsqueeze(0).unsqueeze(2).unsqueeze(3).to(device)])

            # cleanup
            del model2_activations
            torch.cuda.empty_cache()
            
            break

        print("Done Iteration for Stats")

        ###############################################################################
        # All batches processed, create final statistics
        ###############################################################################
        # compute final stats for model 1
        final_model1_stats_list = []

        for iii in range(len(batch_model1_stats_list)):
            means = torch.zeros_like(batch_model1_stats_list[iii][0])
            stds = torch.zeros_like(batch_model1_stats_list[iii][1])
            for jjj in range((iteration+1)):
                means+=model1_stats_list[jjj][iii][0]
                stds+=model1_stats_list[jjj][iii][1]**2

            final_model1_stats_list.append([means/(iteration+1), torch.sqrt(stds/(iteration+1))])
        
        # compute final stats for model 1
        final_model2_stats_list = []

        for iii in range(len(batch_model2_stats_list)):
            means = torch.zeros_like(batch_model2_stats_list[iii][0])
            stds = torch.zeros_like(batch_model2_stats_list[iii][1])
            for jjj in range((iteration+1)):
                means+=model2_stats_list[jjj][iii][0]
                stds+=model2_stats_list[jjj][iii][1]**2

            final_model2_stats_list.append([means/(iteration+1), torch.sqrt(stds/(iteration+1))])

    return final_model1_stats_list, final_model2_stats_list

In [195]:
def save_array(array, filename):
    open_file = open(filename, "wb")
    pickle.dump(array, open_file)
    open_file.close()

In [196]:
def store_activs(model, layers):
    activs = []
    for layer in layers:
        activation = model.retained_layer(layer, clear = True)
        activs.append(activation)
        
    return activs

In [197]:
def dict_layers(activs):
    all_layers = {}
    for iii, activ in enumerate(activs):
        all_layers[activs[iii]] = activ.shape[1]
    return all_layers

In [198]:
def normalize(activation, stats_table):
    eps = 0.00001
    norm_input = (activation- stats_table[0])/(stats_table[1]+eps)
    
    return norm_input

In [211]:
def save_array(array, filename):
    open_file = open(filename, "wb")
    pickle.dump(array, open_file)
    open_file.close()

In [199]:
def create_final_table(all_match_table, model1_dict, model2_dict, batch_size, device ):
    num_activs1 = sum(model1_dict.values())
    num_activs2 = sum(model2_dict.values())
    final_match_table = torch.zeros((num_activs1, num_activs2)).to(device)
    
    model1_activ_count = 0 
    for ii in range(len(all_match_table)):
        model2_activ_count = 0
        for jj in range(len(all_match_table[ii])):
            num_model1activs = all_match_table[ii][0].shape[0]
            num_model2activs = all_match_table[0][jj].shape[1]
            final_match_table[model1_activ_count: model1_activ_count+num_model1activs, \
                            model2_activ_count:model2_activ_count+num_model2activs] = all_match_table[ii][jj]
            model2_activ_count += num_model2activs
        model1_activ_count += num_model1activs
    return final_match_table

In [217]:
from utils import matching, nethook, stats

def activation_matching(model1, model1_layers, model2, model2_layers, dataloader, batch_size, save_path, device):
    model1.eval()
    model2.eval()
    
    # hook layers for model 1
    model1 = nethook.InstrumentedModel(model1)
    model1.retain_layers(model1_layers)
    
    # hook layers for model 2
    model2 = nethook.InstrumentedModel(model2)
    model2.retain_layers(model2_layers)
    
    # compute dataset statistics
    model1_statistics_table, model2_statistics_table = get_mean_std(model1, model1_layers, model2, model2_layers, dataloader, batch_size, device)
    save_array(model1_statistics_table, os.path.join(save_path, "/home/patrick.koller/masterthesis/data/models/model1_statistics.pkl"))
    save_array(model2_statistics_table, os.path.join(save_path, "/home/patrick.koller/masterthesis/data/models/model2_statistics.pkl"))
    
    print("Done")
    print("Starting Activation Matching")
    
    for iteration, data in enumerate(dataloader):
        with torch.no_grad():

            # copy data to GPU
            inputs, _, _, _ = data
            inputs = inputs.to(device)
            
            print(f"Batch {iteration} is being processed...")
            
            # inference first model
            _ = model1(inputs)
            
            # append model 1 layer-activations for batch
            model1_activations = store_activs(model1, model1_layers)

            # preprocess images
            transform = transforms.ToPILImage()
            images_new = []
            for img in inputs:
                images_new.append(clip_preprocess(transform(img)))

            # building image features
            images = torch.tensor(np.stack(images_new)).cuda()
            
            # inference second model
            _ = model2.model.encode_image(images)

            # append model 2 layer-activations for batch
            model2_activations =  store_activs(model2, model2_layers)

            # create dictionary of layers with number of activations
            all_model1_layers = dict_layers(model1_activations)
            all_model2_layers = dict_layers(model2_activations)
            
            if iteration == 0:
                num_model1_activations = sum(all_model1_layers.values())
                num_model2_activations = sum(all_model2_layers.values())
                final_match_table = torch.zeros((num_model1_activations, num_model2_activations)).to(device)

            # matching
            all_match_table = []

            for ii, gan_activ in enumerate(model1_activations):
                match_table = []
                gan_activ = normalize(gan_activ, model1_statistics_table[ii])
                gan_activ_shape = gan_activ.shape

                for jj, clip_activ in enumerate(model2_activations):
                    clip_activ_new = normalize(clip_activ, model2_statistics_table[jj]) 
                    # scale maps to same size
                    map_size = max((gan_activ_shape[2], clip_activ.shape[2]))
                    gan_activ_new = torch.nn.Upsample(size=(map_size,map_size), mode='bilinear')(gan_activ)
                    clip_activ_new = torch.nn.Upsample(size=(map_size,map_size), mode='bilinear')(clip_activ_new)            
                    scores = torch.einsum('aixy,ajxy->ij', gan_activ_new,clip_activ_new)/(batch_size*map_size**2)  
                    scores = scores.cpu()
                    
                    match_table.append(scores)
                    del gan_activ_new
                    del clip_activ_new
                    del scores
                    
                all_match_table.append(match_table)
                del match_table

            # create table
            batch_match_table = create_final_table(all_match_table, all_model1_layers, all_model2_layers, batch_size, device)
            final_match_table += batch_match_table
            save_array(final_match_table, os.path.join(save_path, "norm_table_"+str(iteration)+".pkl"))
        
            del all_match_table
            del batch_match_table
            del model1_activations
            del model2_activations
            torch.cuda.empty_cache()
            
    # average and save
    final_match_table /= len(dataloader)
    save_array(final_match_table, os.path.join(save_path, "./results/table.pkl"))
    

In [214]:
os.system("pwd")

/home/patrick.koller/masterthesis


0

In [215]:
import time

start_time = time.time()
  
activation_matching(standalone_model, standalone_layers,
                    clip_model, clip_layers,
                    dataloader=train_loader,
                    batch_size=DATASET_BATCH_SIZE,
                    save_path="./data/results",
                    device=device
                    )   

end_time = time.time()

print(f"Runtime: {np.round(end_time - start_time, 3)}s")

Collecting Dataset Statistics
Batch 0 is being processed...
Done Iteration for Stats
Done
Starting Activation Matching
Batch 0 is being processed...
Runtime: 4.249s
