In [1]:
# So we can load files from other sub-directories, e.g. datasets.
import os
import sys
from importlib import reload  

module_path = '/u/scr/ananya/cifar_experiments/unlabeled_extrapolation'
USE_CUDA = True
if module_path not in sys.path:
    sys.path.append(module_path)

import torch
import unlabeled_extrapolation.utils.utils as utils
import json
import numpy as np
from scipy.special import softmax
import pickle
from sklearn.linear_model import LogisticRegression

import calibration as cal
reload(utils)

<module 'unlabeled_extrapolation.utils.utils' from '/juice/scr/ananya/cifar_experiments/unlabeled_extrapolation/unlabeled_extrapolation/utils/utils.py'>

In [2]:
print(f'cuda is available: {torch.cuda.is_available()}')
torch.cuda.empty_cache()

cuda is available: True


In [22]:

living17_config = {
    'name': 'living17_lp_ft_ensemble',
    'models':
    [{
        'type': 'ft',
        'path_prefix': module_path + '/logs/full_ft_living17_resnet50/' + 'optimizer.args.lr-0.001_seed-{}_run{}/',
        'checkpoint_suffix': 'ckp_best_source_val_living',
        'id_index': 0,
        'ood_index': 1
     },
     {
        'type': 'lp',
        'path_prefix': module_path + '/logs/linprobe_living17_resnet50/',
        'id_index': 1,
        'ood_index': 2
     },
    ],
    'num_trials': 3,
    'id_val_size': 1000,
}

entity30_config = {
    'name': 'entity30_lp_ft_ensemble',
    'models':
    [{
        'type': 'ft',
        'path_prefix': module_path + '/logs/full_ft_entity30_resnet50/' + 'optimizer.args.lr-0.001_seed-{}_run{}/',
        'checkpoint_suffix': 'ckp_best_source_val_entity',
        'id_index': 0,
        'ood_index': 1
     },
     {
        'type': 'lp',
        'path_prefix': module_path + '/logs/linprobe_entity30_resnet50/',
        'id_index': 1,
        'ood_index': 2
     },
    ],
    'num_trials': 3,
    'id_val_size': 500,
}

domainnet_config = {
    'name': 'domainnet_lp_ft_ensemble',
    'models':
    [{
        'type': 'ft',
        'path_prefix': module_path + '/logs/full_ft_domainnet_clip_resnet50/' + 'optimizer.args.lr-0.001_seed-{}_run{}/',
        'checkpoint_suffix': 'ckp_best_sketch_val',
        'id_index': 0,
        'ood_index': 1
        
     },
     {
        'type': 'lp',
        'path_prefix': module_path + '/logs/linprobe_domainnet_clip_resnet50/',
        'id_index': 4,
        'ood_index': 3
     },
    ],
    'num_trials': 3,
    'id_val_size': 500,
    
}

cifar_stl_config = {
    'name': 'cifar_stl_lp_ft_ensemble',
    'models':
    [{
        'type': 'ft',
        'path_prefix': module_path + '/logs/full_ft_cifar_stl_resnet50/' + 'optimizer.args.lr-0.001_seed-{}_run{}/',
        'checkpoint_suffix': 'ckp_best_cifar10-test',
        'id_index': 0,
        'ood_index': 1
     },
     {
        'type': 'lp',
        'path_prefix': module_path + '/logs/linprobe_cifar_stl_resnet50/',
        'id_index': 1,
        'ood_index': 3
     },
    ],
    'num_trials': 3,
    'id_val_size': 500,
}

all_configs = [entity30_config]

In [23]:

def load_config(config_path):
    with open(config_path) as f:
        config = json.load(f)
    return config

def load_model(config, checkpoint_path):
    if 'unlabeled_extrapolation' not in config['model']['classname']:
        config['model']['classname'] = 'unlabeled_extrapolation.' + config['model']['classname']
    net = utils.initialize(config['model'])
    if USE_CUDA:
        net = net.cuda()
    net.new_last_layer(config['num_classes'])
    utils.load_ckp(checkpoint_path, net)
    return net

# Load datasets.
def load_test_dataset(config, idx, batch_size=32, num_workers=2):
    test_config = config['test_datasets'][idx]
    if 'unlabeled_extrapolation' not in test_config['classname'] and 'torchvision' not in test_config['classname']:
        test_config['classname'] = 'unlabeled_extrapolation.' + test_config['classname']
    if 'transforms' not in test_config:
        test_config['transforms'] = config['default_test_transforms']
    test_data = utils.init_dataset(test_config)
    test_loader = torch.utils.data.DataLoader(
        test_data, batch_size=batch_size,
        shuffle=False, num_workers=num_workers)
    return test_data, test_loader

def get_outputs_labels(net, loader):
    net.cuda()
    net.eval()
    outputs_list, labels_list = [], []
    with torch.no_grad():
        for data in loader:
            images, labels = data
            images, labels = images.cuda(), labels.cuda()
            outputs = net(images)
            outputs_list.append(outputs.detach().cpu().numpy())
            labels_list.append(labels.detach().cpu().numpy())
    outputs = np.concatenate(outputs_list)
    labels = np.concatenate(labels_list)
    return outputs, labels

def split_dataset(inputs, labels, split_len):
    # Returns (inputs1, labels1), (inputs2, labels2)
    assert(len(inputs) == len(labels))
    rng = np.random.RandomState(0)
    random_indices = rng.permutation(len(inputs))
    val_indices = random_indices[:split_len]
    test_indices = random_indices[split_len:]
    return (inputs[val_indices], labels[val_indices]), (inputs[test_indices], labels[test_indices])

def make_validation(logits_list, labels_list, id_val_size):
    (val_logits, val_labels), (test_logits, test_labels) = split_dataset(
        logits_list[0], labels_list[0], split_len=id_val_size)
    return [val_logits, test_logits] + logits_list[1:], [val_labels, test_labels] + labels_list[1:]

def lp_to_all_logits_labels(model_config, config):
    path_prefix = model_config['path_prefix']
    all_labels_list, all_model_logits = [], []
    for i in range(config['num_trials']):
        print(i)
        torch.cuda.empty_cache()
        features_path = path_prefix + 'features_' + str(i)
        features, labels, loader_names = pickle.load(open(features_path, 'rb'))
        weights_path = path_prefix + 'weights_' + str(i) + '.pkl'
        coef, intercept, _, _ = pickle.load(open(weights_path, 'rb'))
        features_list = [features[model_config['id_index']], features[model_config['ood_index']]]
        labels_list = [labels[model_config['id_index']], labels[model_config['ood_index']]]
        print(loader_names, model_config['id_index'], model_config['ood_index'])
        model_logits = [f @ coef.T + intercept for f in features_list]
        model_logits, labels_list = make_validation(
            model_logits, labels_list, id_val_size=config['id_val_size'])
        all_model_logits.append(model_logits)
        all_labels_list.append(labels_list)
    return all_model_logits, all_labels_list

def ft_to_all_logits_labels(model_config, config):
    path_prefix = model_config['path_prefix']
    checkpoint_suffix = model_config['checkpoint_suffix']
    all_labels_list, all_model_logits = [], []
    for i in range(config['num_trials']):
        print(i)
        torch.cuda.empty_cache()
        cur_prefix = path_prefix.format(i, i)
        config_path = cur_prefix + 'config.json'
        ckp_path = cur_prefix + 'checkpoints/' + checkpoint_suffix
        cur_config = load_config(config_path)
        model = load_model(cur_config, ckp_path)
        _, id_loader = load_test_dataset(cur_config, model_config['id_index'])
        _, ood_loader = load_test_dataset(cur_config, model_config['ood_index'])
        id_logits, id_labels = get_outputs_labels(model, id_loader)
        ood_logits, ood_labels = get_outputs_labels(model, ood_loader)
        model_logits = [id_logits, ood_logits]
        labels_list = [id_labels, ood_labels]
        model_logits, labels_list = make_validation(
            model_logits, labels_list, id_val_size=config['id_val_size'])
        all_model_logits.append(model_logits)
        all_labels_list.append(labels_list)
    return all_model_logits, all_labels_list

def model_to_all_logits_labels(model_config, config):
    if model_config['type'] == 'lp':
        return lp_to_all_logits_labels(model_config, config)
    elif model_config['type'] == 'ft':
        return ft_to_all_logits_labels(model_config, config)
    else:
        raise ValueError('model_config must be lp or ft')

def config_to_all_logits_labels(config):
    # For each model, get logits, labels
    rob_all_model_logits, all_labels_list = model_to_all_logits_labels(config['models'][1], config)
    std_all_model_logits, all_labels_list2 = model_to_all_logits_labels(config['models'][0], config)
    return std_all_model_logits, rob_all_model_logits, all_labels_list

    # Split up the first dataset into 1000 and rest, using seed 0.
#     return list(zip(*[get_outputs_labels(model, l) for l in loaders]))

In [24]:
save_dir = '../logs/ensemble_logits/'
for config in all_configs:
    std_all_model_logits, rob_all_model_logits, all_labels_list = config_to_all_logits_labels(config)
    pickle.dump((std_all_model_logits, rob_all_model_logits, all_labels_list),
                open(save_dir + config['name']+'.pkl', "wb"))

0
['source_train_entity', 'source_val_entity', 'target_val_entity'] 1 2
1
['source_train_entity', 'source_val_entity', 'target_val_entity'] 1 2
2
['source_train_entity', 'source_val_entity', 'target_val_entity'] 1 2
0
1
2


In [45]:
len(all_labels_list[0][2])

1700

In [8]:
# For fine-tuning get:

# For linear-probing get:
# features_i.pkl, weights_i.pkl, sub in i

s = '/logs/full_ft_living17_resnet50/' + 'optimizer.args.lr-0.001_seed-{}_run{}'
s.format(0, 0) # 0-index these

'/logs/full_ft_living17_resnet50/optimizer.args.lr-0.001_seed-2_run2'

'/logs/full_ft_living17_resnet50/optimizer.args.lr-0.001_seed-2_run2'