In [37]:
import argparse
import os
import json
import tqdm
import math
import numpy as np
import random
import time
import torch
import torchvision.transforms as transforms
from matplotlib import pyplot as plt
from PIL import Image
import pprint
import pandas as pd

np.random.seed(42)
random.seed(42)

from model import Seq2SeqAutoEncoderModel, Seq2SeqAutoEncoderConfig
from evaluation import decode_image_from_seq, visualize_segments, tsne_visualize, get_knn_similarity, get_datasets, linear_evaluation, reconstruction_evaluation


from data.dataset import get_dataset, SeqMaskDataset, LVISDataset, V3DetDataset, COCODataset, VisualGenomeDataset, SA1BDataset

def get_datasets(model, expand_mask_ratio=0):

    coco_dataset = SeqMaskDataset(
        dataset=COCODataset(coco_root='/home/dchenbs/workspace/datasets/coco2017', split='val'), 
        num_queries=model.config.num_queries, data_seq_length=model.config.data_seq_length,
        text_features='data/text_features/coco_clip_rn50.npy',
        expand_mask_ratio=expand_mask_ratio,
    )

    lvis_dataset = SeqMaskDataset(
        dataset=LVISDataset(lvis_root='/home/dchenbs/workspace/datasets/lvis', coco_root='/home/dchenbs/workspace/datasets/coco2017', split='val'), 
        num_queries=model.config.num_queries, data_seq_length=model.config.data_seq_length,
        text_features='data/text_features/lvis_clip_rn50.npy',
        expand_mask_ratio=expand_mask_ratio,
    )

    v3det_dataset = SeqMaskDataset(
        dataset=V3DetDataset(v3det_root='/home/dchenbs/workspace/datasets/v3det', split='val'), 
        num_queries=model.config.num_queries, data_seq_length=model.config.data_seq_length,
        text_features='data/text_features/v3det_clip_rn50.npy',
        expand_mask_ratio=expand_mask_ratio,
    )

    return [coco_dataset, lvis_dataset, v3det_dataset]

In [38]:
# os.environ['CUDA_VISIBLE_DEVICES'] = '3'
# model_dir = '/home/dchenbs/workspace/Seq2Seq-AutoEncoder/runs/Nov28_20-50-04_host19-SA1B-[327MB-16queries-1024]-[lr1e-05-bs16x1step-8gpu]/checkpoints/checkpoint_ep0_step1000k'

# print(f'Loading model from {model_dir}')

# model = Seq2SeqAutoEncoderModel.from_pretrained(model_dir).half().cuda().eval()
# datasets = get_datasets(model)

# num_steps = 4
# batch_size = 2

# for dataset in datasets[1:]:
#     print(f'Generating reconstructions for {dataset.dataset.dataset_name} dataset')
#     save_dir = os.path.join(model_dir, 'reconstructions')
#     os.makedirs(save_dir, exist_ok=True)
#     for step in range(num_steps):
#         batch_data = []
#         batch_sample_info = []
#         for i in range(batch_size):
#             success = False
#             while not success:
#                 this_data, this_sample_info = dataset[np.random.randint(0, len(dataset))]        

#                 img = Image.open(this_sample_info['image_path'])
#                 aspect_ratio = img.size[0] / img.size[1]
#                 img_area = img.size[0] * img.size[1]
#                 if aspect_ratio < 1:
#                     aspect_ratio = 1 / aspect_ratio
#                 area = this_sample_info['bbox'][2] * this_sample_info['bbox'][3]
#                 if area > 0.33*img_area and area < 0.66*img_area and aspect_ratio < 1.5:
#                     success = True

#             batch_data.append(this_data)
#             batch_sample_info.append(this_sample_info)
#             dataset.sample_buffer = []

#         batch_data = torch.stack(batch_data).half().cuda()
#         batch_latents = model.encode(batch_data)
#         batch_reconstructed = model.generate(batch_latents, show_progress_bar=True)

#         for i in range(batch_size):
#             seq = batch_data[i]
#             reconstructed = batch_reconstructed[i]
#             sample_info = batch_sample_info[i]

#             original_segment, original_mask = decode_image_from_seq(seq.float().cpu().numpy())
#             reconstructed_segment, reconstructed_mask = decode_image_from_seq(reconstructed.float().cpu().numpy())

#             print(f"[{dataset.dataset.dataset_name}]: {sample_info['caption']}")
#             fig = visualize_segments(sample_info, original_segment, reconstructed_segment)
            
#             img_name = f'{dataset.dataset.dataset_name}_{step*batch_size + i}.png'
#             plt.savefig(os.path.join(save_dir, img_name), bbox_inches='tight')
#             # plt.show()
#             plt.close(fig)

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = '3'
model_configs = [
    'runs/Nov14_17-31-06_host19-SA1B-[327MB-16queries-1024]-[lr1e-05-bs16x1step-8gpu]/checkpoints/checkpoint_ep0_step50k/config.json',
    'runs/Nov14_17-31-06_host19-SA1B-[327MB-16queries-1024]-[lr1e-05-bs16x1step-8gpu]/checkpoints/checkpoint_ep0_step50k',
    'runs/Nov14_17-31-06_host19-SA1B-[327MB-16queries-1024]-[lr1e-05-bs16x1step-8gpu]/checkpoints/checkpoint_ep0_step500k',
    'runs/Nov14_17-31-06_host19-SA1B-[327MB-16queries-1024]-[lr1e-05-bs16x1step-8gpu]/checkpoints/checkpoint_ep0_step1000k',
    'runs/Nov28_20-50-04_host19-SA1B-[327MB-16queries-1024]-[lr1e-05-bs16x1step-8gpu]/checkpoints/checkpoint_ep0_step500k',
    'runs/Nov28_20-50-04_host19-SA1B-[327MB-16queries-1024]-[lr1e-05-bs16x1step-8gpu]/checkpoints/checkpoint_ep0_step1000k',
    ]
features = {}
num_steps=250
batch_size=100

for model_config in model_configs:
    if model_config.endswith('.json'):
        config = Seq2SeqAutoEncoderConfig.from_json_file(model_config)
        model = Seq2SeqAutoEncoderModel(config)
    else:
        model = Seq2SeqAutoEncoderModel.from_pretrained(model_config)
    model = model.half().cuda().eval()
    print(f'>>> Loaded model from {model_config}')

    features[model_config] = {}

    datasets = get_datasets(model, expand_mask_ratio=0)[1:2] # only coco
    for dataset in datasets:
        all_latents = []
        all_sample_info = []
        print(f'Generating latent vectors for {dataset.dataset.dataset_name} dataset')
        for step in tqdm.tqdm(range(num_steps)):
            batch_data = []
            for i in range(batch_size):
                this_data, this_sample_info = dataset[np.random.randint(0, len(dataset))]
                batch_data.append(this_data)
                this_sample_info['class_id'] = dataset.dataset.class_name_to_class_id(this_sample_info['name'])
                all_sample_info.append(this_sample_info)
            batch_data = torch.stack(batch_data).half().cuda()

            with torch.no_grad():
                batch_latents = model.encode(batch_data).cpu().numpy()
            all_latents.append(batch_latents)
        
        all_latents = np.concatenate(all_latents, axis=0)
        all_ids = np.array([x['class_id'] for x in all_sample_info])
        all_text_features = np.array([x['text_feature'] for x in all_sample_info])

        features[model_config][dataset.dataset.dataset_name] = {
            'latents': all_latents,
            'ids': all_ids,
            'text_features': all_text_features,
        }
        np.save('/home/dchenbs/workspace/cache/temp/features.npy', features)
features = np.load('/home/dchenbs/workspace/cache/temp/features.npy', allow_pickle=True).item()

In [None]:
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity
def get_knn_similarity(latents, references, k=10):
    if None in references:
        return 0
    knn = NearestNeighbors(n_neighbors=k)
    knn.fit(latents)
    overall_similarity = 0
    
    for i in tqdm.tqdm(range(len(latents))):
        latent = latents[i]
        distances, indices = knn.kneighbors([latent])
        positive_references = references[indices[0]]
        centroids = positive_references[0].reshape(1, -1)
        similarities = cosine_similarity(centroids, positive_references)
        overall_similarity += np.mean(similarities)
    
    return overall_similarity/len(latents)


from sklearn.manifold import TSNE
def tsne_visualize(latents, ids, title=''):
    tsne = TSNE(n_components=2, random_state=42)
    latents_tsne = tsne.fit_transform(latents)

    fig = plt.figure(figsize=(6, 6))
    plt.scatter(latents_tsne[:, 0], latents_tsne[:, 1], c=ids, cmap='tab20', s=10, alpha=0.5)
    plt.axis('off')
    plt.title(title)
    # plt.savefig(os.path.join(vis_dir, f'tsne-{dataset.dataset.dataset_name}-{model_dir.split("/")[-1]}.png'), bbox_inches='tight', pad_inches=0)
    plt.show()

import umap
def umap_visualize(latents, ids, title=''):
    latents_umap = umap.UMAP().fit_transform(latents)

    fig = plt.figure(figsize=(6, 6))
    plt.scatter(latents_umap[:, 0], latents_umap[:, 1], c=ids, cmap='tab20', s=10, alpha=0.5)
    plt.axis('off')
    plt.title(title)
    # plt.savefig(os.path.join(vis_dir, f'tsne-{dataset.dataset.dataset_name}-{model_dir.split("/")[-1]}.png'), bbox_inches='tight', pad_inches=0)
    plt.show()

In [None]:

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def linear_classification_eevaluation(latents, ids, val_split_ratio=0.5):

    train_latents, val_latents, train_ids, val_ids = train_test_split(latents, ids, test_size=val_split_ratio, random_state=42)
    clf = LogisticRegression(random_state=42, max_iter=2000).fit(train_latents, train_ids)
    predicted_ids = clf.predict(val_latents)
    return accuracy_score(val_ids, predicted_ids)


def knn_classification_evaluation(latents, ids, val_split_ratio=0.5, k=5):
    
    train_latents, val_latents, train_ids, val_ids = train_test_split(latents, ids, test_size=val_split_ratio, random_state=42)
    knn = NearestNeighbors(n_neighbors=k)
    knn.fit(train_latents)

    overall_similarity = 0
    for i in tqdm.tqdm(range(len(val_latents))):
        latent = val_latents[i]
        distances, indices = knn.kneighbors([latent])
        class_ids = train_ids[indices[0]]
        class_id = np.argmax(np.bincount(class_ids))
        overall_similarity += (class_id == val_ids[i])
    return overall_similarity/len(val_latents)

dataset_name = 'coco'
for model_config in model_configs:
    print(f'{model_config}')
    features_dict = features[model_config][dataset_name]
    predictions = features_dict['latents']
    reference = features_dict['text_features']
    ids = features_dict['ids']

    # tsne_visualize(predictions, ids, title=f'{model_config.split("/")[-2]}-{dataset_name}')
    # umap_visualize(predictions, ids, title=f'{model_config.split("/")[-2]}-{dataset_name}')

    # acc = linear_classification_eevaluation(predictions, ids, train_val_split_ratio=0.5)
    # print(f'Linear classification accuracy: {acc}')
    acc = knn_classification_evaluation(predictions, ids, val_split_ratio=0.2, k=8)
    print(f'KNN classification accuracy: {acc}')
