# Qualitative Evaluation of CroQS Benchmark samples

In [1]:
from lib.CroQS import CroQS

croQS_json_path = "./CroQS_Benchmark_v1.0.0.json"
croQS = CroQS(croQS_json_path)

In [2]:
croQS.list_queries()

['dog running',
 'person reading',
 'laptop near',
 'car moving',
 'small house',
 'horse sleeping',
 'cat drinking',
 'factory',
 'chair',
 'classroom',
 'a person cooking',
 'kid eating',
 'person with pizza',
 'person with a surf board',
 'person riding',
 'reading a news paper',
 'keyboard',
 'using a notebook',
 'running water',
 'a cat proud',
 'dog in the water',
 'riding a motorcycle',
 'flying a kite',
 'marketplace',
 'kitchen appliances',
 'a photo of a tree',
 'people enjoying',
 'a portrait',
 'person at the phone',
 'football',
 'dish of food',
 'playing videogames',
 'sunrise',
 'a woman in the city',
 'a sport race',
 'ice cream',
 'police',
 'playing frisbee',
 'happiness',
 'a man with water',
 'backpack',
 'clock',
 'italy',
 'italian car',
 'cat resting',
 'glass bottle',
 'tennis player',
 'road sign',
 'mountain',
 'bridge']

In [3]:
initial_query = "a sport race"

In [4]:
croQS.list_clusters_queries(initial_query)

[('0', 'a sport race of people riding'),
 ('1', 'a sport race of horses'),
 ('2', 'a sport race on water'),
 ('3', 'a winter sport race'),
 ('4', 'a sport race of motorcycles')]

In [9]:
num_images = []

for q in croQS.list_queries():
    num_im = len(croQS._get_coco_ids_of_query(q))
    num_images.append(num_im)

print(f"The total number of images is {sum(num_images)}")
print(f"The average number of images per query is {sum(num_images) / len(num_images)}")
print(f"The minimum number of images for a query is {min(num_images)}")
print(f"The maximum number of images for a query is {max(num_images)}")

The total number of images is 8929
The average number of images per query is 178.58
The minimum number of images for a query is 109
The maximum number of images for a query is 200


In [7]:
num_clusters = []

for q in croQS.list_queries():
    num_cl = len(croQS.list_clusters_queries(q))
    num_clusters.append(num_cl)

In [13]:
print(f"The total number of semantic clusters is {sum(num_clusters)}")
print(f"The average number of clusters per query is {sum(num_clusters) / len(num_clusters)}")
print(f"The minimum number of clusters for a query is {min(num_clusters)}")
print(f"The maximum number of clusters for a query is {max(num_clusters)}")

The total number of semantic clusters is 295
The average number of clusters per query is 5.9
The minimum number of clusters for a query is 2
The maximum number of clusters for a query is 10


In [6]:
initial_query = "a sport race"
html_content = croQS.show_clusters(initial_query, render=True)
html_content

### Building browsable dataset in html

In [3]:
import os
q_file_names = []
q_num_clusters = []
output_folder_name = "browsable-dataset"

cors_proxy = "https://corsproxy.io/?" # None

use_coco_http_urls = True

if use_coco_http_urls:
    images_local_path = None
    images_path_from_html_folder = None
    cors_proxy = None
else:
    if cors_proxy is None:
        images_local_path = os.path.join(output_folder_name, "coco-images")
        images_path_from_html_folder = "coco-images"
        if not os.path.exists(images_local_path):
            os.makedirs(images_local_path)
    else:
        images_local_path = None
        images_path_from_html_folder = None


for q in croQS.list_queries():
    html_content = croQS.show_clusters(q, render=False, images_local_path=images_local_path, 
                                       images_path_from_html_folder=images_path_from_html_folder,
                                       cors_proxy=cors_proxy)
    assert isinstance(q, str)
    q_file_name = q.strip().replace(" ", "-") + ".html"
    html_output_path = f"./{output_folder_name}/{q_file_name}"
    with open(html_output_path, 'w') as out:
        out.write(html_content)
    q_file_names.append(q_file_name)
    q_num_clusters.append(len(croQS._list_clusters_labels(q)))

In [4]:
from jinja2 import Environment, FileSystemLoader
jinja_env = Environment(loader=FileSystemLoader('templates'))
index_template = jinja_env.get_template('queries-index.html')

html_content = index_template.render(q_file_names=q_file_names, q_num_clusters=q_num_clusters)

with open(f'./{output_folder_name}/index.html', 'w') as out:
    out.write(html_content)