In [2]:
!pwd

/home/elpis_ubuntu/pretrain/RHO-Loss/notebooks


In [3]:
from typing import List, Optional
import os
import sys
sys.path.append('../')
import numpy as np

import hydra
from omegaconf import DictConfig, OmegaConf
from pytorch_lightning import (
    Callback,
    LightningDataModule,
    LightningModule,
    Trainer,
    seed_everything,
)
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
import torchvision.transforms as T
from torchvision.datasets import CIFAR100, CIFAR10
from tqdm import tqdm
import pickle as pkl

from src.utils import utils
import copy
from sklearn.linear_model import LogisticRegression


log = utils.get_logger(__name__)

import pdb

In [4]:
# where to download the datasets
data_dir = "../data/"

# where to upload the weights and biases logs
my_project = "tutorial_notebook"
my_entity = "xyz"

# Calculate p(y_t | y_s)

In [4]:
tm_list = []
config = {
    "model": {
        "_target_": "src.models.OneModel.OneModel.load_from_checkpoint",
        "checkpoint_path": f"/home/elpis_ubuntu/pretrain/RHO-Loss/logs/runs/2023-03-01/02-38-19/checkpoints/epoch_040.ckpt"
    },
    "seed": 12,
    "debug": False,
    "ignore_warnings": True,
    "base_outdir": "logs",
}
config = OmegaConf.create(config)
utils.print_config(
    config,
    fields=(
        "model",
        "seed",
    ),
    resolve=True,
)
if "seed" in config:
    seed_everything(config.seed, workers=True)

model: LightningModule = hydra.utils.instantiate(
    config.model,
    _convert_="partial",
)

# Create the transition matrix from cifar10(source) to cifar100(target)
test_transform = T.Compose([
    T.ToTensor(),
    T.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    # T.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)) # CIFAR-100
])
cifar_train = CIFAR100('./data', train=True, download=True, transform=test_transform)
train_loader = DataLoader(cifar_train, batch_size=1, num_workers=0)
transition_matrix = np.zeros((10, 100))

model.cuda()
model.eval()
with torch.no_grad():
    for image, target in tqdm(train_loader):
        image, target = image.cuda(), target.cuda()
        pred = model(image)
        pred_l = torch.argmax(pred, dim=1).detach().cpu().tolist()
        # for t, p_s in enumerate(pred_l):
        transition_matrix[pred_l[0]][target.detach().cpu().tolist()[0]] += 1

tm = copy.copy(transition_matrix) / len(train_loader)
p_y_s = tm.sum(axis=1)
for i, p in enumerate(p_y_s):
    tm[i] = tm[i] / p
with open(f'./data/cifar10usage0.1_cifar100_transition.pkl', 'wb') as f:
    pkl.dump(tm, f)

Global seed set to 12


cwd: /home/elpis_ubuntu/pretrain/RHO-Loss/notebooks/../src/datamodules
Files already downloaded and verified


100%|██████████| 50000/50000 [01:52<00:00, 445.21it/s]


# Initialize Linear Probe on Source

In [5]:
config = {
    "model": {
        "_target_": "src.models.OneModel.OneModel.load_from_checkpoint",
        "checkpoint_path": f"/home/elpis_ubuntu/pretrain/RHO-Loss/logs/runs/2023-03-01/02-38-19/checkpoints/epoch_040.ckpt"
    },
    "seed": 12,
    "debug": False,
    "ignore_warnings": True,
    "base_outdir": "logs",
}
config = OmegaConf.create(config)
utils.print_config(
    config,
    fields=(
        "model",
        "seed",
    ),
    resolve=True,
)
if "seed" in config:
    seed_everything(config.seed, workers=True)

model: LightningModule = hydra.utils.instantiate(
    config.model,
    _convert_="partial",
)

# Create the transition matrix from cifar10(source) to cifar100(target)
test_transform = T.Compose([
    T.ToTensor(),
    T.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    # T.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)) # CIFAR-100
])
cifar_train = CIFAR100('./data', train=True, download=True, transform=test_transform)
train_loader = DataLoader(cifar_train, shuffle=False, batch_size=1024, num_workers=0)
embedding_matrix = []
model.model.fc = nn.Sequential()

model.cuda()
model.eval()
with torch.no_grad():
    for image, target in tqdm(train_loader):
        image, target = image.cuda(), target.cuda()
        embedding = model(image)
        embedding_matrix.append(embedding.detach().cpu())

embedding_matrix = torch.vstack(embedding_matrix)
with open(f'./data/cifar10usage0.1_cifar100_embedding.pkl', 'wb') as f:
    pkl.dump(embedding_matrix, f)

clf = LogisticRegression(random_state=13, max_iter=10000).fit(embedding_matrix.numpy(), cifar_train.targets)
with open(f'./data/cifar10usage0.1_cifar100_lp.pkl', 'wb') as f:
    pkl.dump(clf, f)

Global seed set to 12


cwd: /home/elpis_ubuntu/pretrain/RHO-Loss/notebooks/../src/datamodules
Files already downloaded and verified


100%|██████████| 49/49 [00:06<00:00,  7.39it/s]


# Linear probe from source to Target

In [None]:
ckpt_list = [
    '/home/elpis_ubuntu/pretrain/RHO-Loss/logs/runs/2023-02-11/16-41-17/checkpoints/epoch_099.ckpt', # ft epoch 1
    '/home/elpis_ubuntu/pretrain/RHO-Loss/logs/runs/2023-02-11/17-00-56/checkpoints/epoch_099.ckpt', # ft epoch 10
    '/home/elpis_ubuntu/pretrain/RHO-Loss/logs/runs/2023-02-11/17-20-40/checkpoints/epoch_099.ckpt', # ft epoch 20
    '/home/elpis_ubuntu/pretrain/RHO-Loss/logs/runs/2023-02-11/17-39-50/checkpoints/epoch_099.ckpt', # ft epoch 30
    '/home/elpis_ubuntu/pretrain/RHO-Loss/logs/runs/2023-02-11/17-58-57/checkpoints/epoch_099.ckpt', # ft epoch 40
    '/home/elpis_ubuntu/pretrain/RHO-Loss/logs/runs/2023-02-11/18-17-50/checkpoints/epoch_099.ckpt', # ft epoch 50
    '/home/elpis_ubuntu/pretrain/RHO-Loss/logs/runs/2023-02-11/18-36-44/checkpoints/epoch_099.ckpt', # ft epoch 60
    '/home/elpis_ubuntu/pretrain/RHO-Loss/logs/runs/2023-02-11/18-55-57/checkpoints/epoch_099.ckpt', # ft epoch 70
    '/home/elpis_ubuntu/pretrain/RHO-Loss/logs/runs/2023-02-11/19-14-59/checkpoints/epoch_099.ckpt', # ft epoch 80
    '/home/elpis_ubuntu/pretrain/RHO-Loss/logs/runs/2023-02-11/19-33-56/checkpoints/epoch_099.ckpt', # ft epoch 90
    '/home/elpis_ubuntu/pretrain/RHO-Loss/logs/runs/2023-02-11/19-53-07/checkpoints/epoch_099.ckpt', # ft epoch 100
]

for ckpt in ckpt_list:
    config = {
        "model": {
            "_target_": "src.models.OneModel.OneModel.load_from_checkpoint",
            "checkpoint_path": ckpt
        },
        "seed": 12,
        "debug": False,
        "ignore_warnings": True,
        "base_outdir": "logs",
    }
    config = OmegaConf.create(config)
    utils.print_config(
        config,
        fields=(
            "model",
            "seed",
        ),
        resolve=True,
    )
    if "seed" in config:
        seed_everything(config.seed, workers=True)

    model: LightningModule = hydra.utils.instantiate(
        config.model,
        _convert_="partial",
    )

    # Create the transition matrix from cifar10(source) to cifar100(target)
    test_transform = T.Compose([
        T.ToTensor(),
        T.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        # T.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)) # CIFAR-100
    ])
    cifar_train = CIFAR100('./data', train=True, download=True, transform=test_transform)
    train_loader = DataLoader(cifar_train, shuffle=False, batch_size=1024, num_workers=0)
    embedding_matrix = []
    model.model.fc = nn.Sequential()

    model.cuda()
    model.eval()
    with torch.no_grad():
        for image, target in tqdm(train_loader):
            image, target = image.cuda(), target.cuda()
            embedding = model(image)
            embedding_matrix.append(embedding.detach().cpu())

    embedding_matrix = torch.vstack(embedding_matrix)
    clf = LogisticRegression(random_state=13, max_iter=10000).fit(embedding_matrix.numpy(), cifar_train.targets)
    with open(f'./data/cifar10_cifar100_ft_lp.pkl', 'wb') as f:
        pkl.dump(clf, f)