# Imagenet data
We will now do a similar spiel with the imagenet data from cblearn.

In [41]:
import sys
sys.path.append("..")
from pathlib import Path
import pandas as pd
import re
import numpy as np
import altair as alt
from typing import Optional
from tangles.plotting import plot_soft_predictions
from estimators import SoeKmeans
from tangles.data_types import Data
from sklearn.utils import Bunch
from cblearn.embedding import SOE
from cblearn.preprocessing import triplets_from_mostcentral, triplets_from_oddoneout, triplets_from_multiselect
from cblearn.datasets import fetch_imagenet_similarity
from imagenet_class_mapping import LABEL_TO_HUMAN_DICT, COCO_IMAGENET_REVERSE_DICT
import matplotlib.pyplot as plt
from triplets import triplets_to_majority_neighbour_cuts
from estimators import OrdinalTangles

In [42]:
data = fetch_imagenet_similarity(version="0.1")
triplets = triplets_from_multiselect(data.data, select=2, is_ranked=True)
cuts = triplets_to_majority_neighbour_cuts(triplets, radius=1/2)

In [43]:
cuts_small = cuts[:, :999]
tangles = OrdinalTangles(25)
y_pred = tangles.fit_predict(cuts_small)

In [58]:
classes = []
coco = True

for i in data.class_label:
    if coco:
        label = COCO_IMAGENET_REVERSE_DICT.get(i, "None")
    else:
        label = LABEL_TO_HUMAN_DICT.get(i, "None")
    classes.append(label)
print(classes[:10])

def filter_none(xs):
    return list(filter(lambda x: x != "None", list(xs)))

classes = np.array(classes)

['None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'bird', 'None']


In [59]:
for class_id in range(np.unique(y_pred).max() + 1):
    print(class_id)
    print(filter_none(classes[y_pred == class_id]))

0
['dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'cat']
1
['bicycle', 'bottle', 'bottle', 'bottle', 'bottle']
2
['bird', 'bird', 'bird']
3
['bird', 'dog', 'chair', 'chair', 'keyboard', 'clock']
4
['bird', 'dog', 'cat', 'bear', 'clock']
5
['dog', 'bottle', 'boat']
6
[]
7
['bird']
8
['truck', 'bicycle', 'truck']
9
['dog', 'truck']
10
['dog', 'truck']
11
['boat']
12
['bird', 'dog', 'dog']
13
['car', 'car', 'truck']
14
['airplane', 'keyboard']
15
['bird']
16
['bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog',

In [60]:
embedding = SoeKmeans(embedding_dimension=10, n_clusters=12)
y_soe_kmeans = embedding.fit_predict(triplets, np.ones(triplets.shape[0]))

In [61]:
for class_id in range(12):
    print(class_id)
    print(filter_none(classes[y_soe_kmeans == class_id]))

0
['dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'cat']
1
['bicycle', 'bottle', 'bottle', 'bottle', 'bottle']
2
['bird', 'bird', 'bird']
3
['bird', 'dog', 'chair', 'chair', 'keyboard', 'clock']
4
['bird', 'dog', 'cat', 'bear', 'clock']
5
['dog', 'bottle', 'boat']
6
[]
7
['bird']
8
['truck', 'bicycle', 'truck']
9
['dog', 'truck']
10
['dog', 'truck']
11
['boat']
