In [1]:
import json
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch

from collections import defaultdict
import os.path as osp
from typing import Union, Dict, List

#filenames=["camelyon", "camelyon_np", "resnet_camelyon", "resnet_np_camelyon"]
filenames=["camelyon", "camelyon_np"]
res_path = "../logs/div1/"

res = defaultdict(list)

print(f"{'METHOD':<30}\tTEST_ACC        TEST_ACC_STD")

for filename in filenames:

    filename_complete = osp.join(res_path, f"{filename}_results.json")

    with open(filename_complete) as f:
        logs=json.load(f)
        test_acc = logs["test"]["acc_avg"]
        test_acc_std = logs["test"]["acc_avg_std"]
        print(f"{filename:<30}\t{test_acc:.3f}              {test_acc_std:.3f}")


METHOD                        	TEST_ACC        TEST_ACC_STD
camelyon                      	0.915              nan
camelyon_np                   	0.836              nan


In [12]:
import numpy as np

for p in [False,True]:
    tot_sims = []
    for i in [2,3,4]:
        print(f"pretrained {p}  seed {i}")

        with open(f"/datasets/home/hbenoit/sim_seed{i}_p={p}.txt", "r") as f:
        
            sims = np.array([float(x.replace("\n","")) for x in list(f.readlines())])
            cutoff = len(sims)//10
            print(sims[:-cutoff].mean())
            tot_sims.append(sims[:-cutoff].mean())
            print()
    
    tot_sims = np.array(tot_sims)
    print(f"mean  {tot_sims.mean()}  std {tot_sims.std()}")

pretrained False  seed 2
0.5664769654864343

pretrained False  seed 3
0.538223832698547

pretrained False  seed 4
0.5425295792777542

mean  0.5490767924875786  std 0.012428713494530093
pretrained True  seed 2
0.5730625058776508

pretrained True  seed 3
0.5814260356420746

pretrained True  seed 4
0.5894589475478441

mean  0.5813158296891898  std 0.006694272869403668


In [6]:
sims.mean()

0.5617409013965299

In [26]:
sims.mean()

0.5804060218978102

In [28]:
sims.mean()

0.46620124113475175

In [30]:
cam_p_sims=np.array([0.49626706484641636, 0.421720297029703, 0.4940905448717949])
cam_p_sims.mean(), cam_p_sims.std()

(0.47069263558263813, 0.03464007089395604)

In [31]:
cam_np_sims=np.array([0.5404661016949153, 0.5804060218978102, 0.46620124113475175])
cam_np_sims.mean(), cam_np_sims.std()

(0.5290244549091591, 0.04732065404001418)

In [2]:
from collections import defaultdict

res_path="/datasets/home/hbenoit/DivDis-exp/wilds/logs"
filenames=["camelyon", "camelyon_np"]

seeds = [2,3,4]
num_heads = 2
training_modes=["div10", "ERM"]


def print_divdis_results(res_path, training_mode, seeds, filename):
    sims=[]
    test_acc= defaultdict(list)
    for seed in  seeds:
        res_folder = osp.join(res_path,training_mode,f"{filename}_seed{seed}")
        path = osp.join(res_folder, f"camelyon17_split:test_seed:{seed}_epoch:best_preds.json")
        with open(path) as f:
            res=json.load(f)
            preds = {}
            epoch_y_true = torch.tensor(res["epoch_y_true"])
            for i in range(num_heads):
                preds[f"h_{i}"] = torch.tensor(res[f"epoch_y_pred_res_h_{i}"])
                test_acc[f"h_{i}"].append((preds[f"h_{i}"] == epoch_y_true).float().mean().item())
            sim = (preds["h_0"] == preds["h_1"]).float().mean()
            sims.append(sim)
        
        #save_preds_path= osp.join(res_folder, "save_preds.json")
        #if osp.exists(save_preds_path):
        #    with open(save_preds_path) as f:
        #        save_preds = json.load(f)
        #        test = save_preds["res_h0"]["acc_avg"]
        #        print("h0 test")
    sims_mean = np.array(sims).mean()
    sims_std = np.array(sims).std()
    print("")
    print(filename, f"training_mode={training_mode}")
    print("h0\t\th1\t\tsimilarity")
    res_string = ""
    for k in test_acc:
        mean_test_acc = np.array(test_acc[k]).mean()
        std_test_acc = np.array(test_acc[k]).std()
        res_string = res_string + f"{mean_test_acc:.3f} +- {std_test_acc:.3f} \t"
    res_string = res_string + f"{sims_mean:.5f} +- {sims_std:.3f}"
    print(res_string)


for training_mode in training_modes:
    print()
    for filename in filenames:
        if "div" in training_mode:
            print_divdis_results(res_path=res_path, training_mode=training_mode, seeds=seeds, filename=filename)
        






camelyon training_mode=div10
h0		h1		similarity
0.866 +- 0.017 	0.802 +- 0.056 	0.90988 +- 0.077

camelyon_np training_mode=div10
h0		h1		similarity
0.856 +- 0.009 	0.851 +- 0.013 	0.98260 +- 0.004



In [9]:
import torch
import torch
import torch.nn as nn
import numpy as np
import sys
sys.path.insert(0, '/datasets/home/hbenoit/DivDis-exp/wilds/')
from algorithms.DivDis import DivDis, MultiHeadModel
from models.initializer import initialize_torchvision_model
from utils import load, move_to
import wilds
from wilds.common.data_loaders import get_eval_loader, get_train_loader
from transforms import initialize_transform
import json
from argparse import Namespace
import argparse
import os

from wilds.common.grouper import CombinatorialGrouper

path = "/datasets/home/hbenoit/DivDis-exp/wilds/logs/div10/camelyon_seed2/camelyon17_seed:2_epoch:best_model.pth"


res = torch.load(path)



In [10]:
class MultiHeadModel(nn.Module):
    def __init__(self, featurizer, classifier, heads=2):
        super().__init__()
        self.heads = heads
        self.featurizer = featurizer
        in_dim, out_dim = classifier.in_features, classifier.out_features * self.heads
        self.heads_classifier = nn.Linear(in_dim, out_dim)

    def forward(self, x):
        features = self.featurizer(x)
        outputs = self.heads_classifier(features)
        return outputs
    

    def process_batch(self, batch):
        """
        Overrides single_model_algorithm.process_batch().
        Args:
            - batch (x, y, m): a batch of data yielded by data loaders
        Output:
            - results (dictionary): information about the batch
                - y_true (Tensor): ground truth labels for batch

                - y_pred (Tensor): model output for batch

        """
        # Labeled examples
        x, y_true, metadata = batch
        x = move_to(x, DEVICE)
        y_true = move_to(y_true, DEVICE)
        # package the results
        results = { "y_true": y_true, "metadata": metadata}

        pred = self.forward(x)
        preds_chunked = torch.chunk(pred, self.heads, dim=-1)
        for i in range(self.heads):
            results[f"y_pred_{i}"] = preds_chunked[i]

        return results


featurizer = initialize_torchvision_model(
    name="densenet121", d_out=None, **{"pretrained":False}
)
classifier = nn.Linear(featurizer.d_out, 4)

import copy

model= res["algorithm"]

new_model = copy.deepcopy(model)
for key in model:
    if "model.featurizer." in key:
        new_key = key.replace("model.featurizer." ,"")
        new_model[new_key] = model[key]
        del new_model[key]
    elif "model.heads_classifier." in key:
        new_key = key.replace("model.heads_classifier.","")
        new_model[new_key] = model[key]
        del new_model[key]

featurizer.load_state_dict(new_model,strict=False)
classifier.load_state_dict({"weight":new_model["weight"], "bias":new_model["bias"]}, strict=True)

algorithm = MultiHeadModel(featurizer=featurizer, classifier=classifier)

In [12]:
from wilds import get_dataset
from wilds.common.data_loaders import get_eval_loader

dataset = get_dataset(
            dataset="camelyon17",
            root_dir="/datasets/home/hbenoit/D-BAT-exp/datasets/",
            unlabeled=True,
            download=False,
        )


eval_transform = initialize_transform(
    transform_name="image_base",
    dataset=dataset,
    is_training=False,
)

test_unlabeled = dataset.get_subset(split="test_unlabeled")
test_loader = get_eval_loader(loader="standard", dataset=test_unlabeled, batch_size=512)

TypeError: initialize_transform() missing 1 required positional argument: 'config'

In [11]:
for x in test_loader:

    print(x)
    break

TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'PIL.Image.Image'>

In [8]:
dataset.split_dict

{'train_unlabeled': 10, 'val_unlabeled': 11, 'test_unlabeled': 12}

In [22]:
from wilds.datasets.camelyon17_dataset 

In [9]:
import torch


model = torch.hub.load('facebookresearch/swav:main', 'resnet50')

Downloading: "https://github.com/facebookresearch/swav/archive/main.zip" to /home/hbenoit/.cache/torch/hub/main.zip
Downloading: "https://dl.fbaipublicfiles.com/deepcluster/swav_800ep_pretrain.pth.tar" to /home/hbenoit/.cache/torch/hub/checkpoints/swav_800ep_pretrain.pth.tar


  0%|          | 0.00/108M [00:00<?, ?B/s]

In [11]:
model.fc

Linear(in_features=2048, out_features=1000, bias=True)