In [1]:
import sys
import os
from tqdm import tqdm
import pandas as pd
import json
import random

parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(parent_dir)

from visual_genome.local import VisualGenome

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
vg = VisualGenome()

Script directory: C:\Users\karab\Desktop\Visual Genome Driver\visual_genome
Data directory: C:\Users\karab\Desktop\Visual Genome Driver\data
Loading data...
Data loaded.


In [3]:
ims = []

with open('filtered_images_2.txt', 'r') as f:
    for line in f:
        ims.append(line.strip())

ims = [int(im) for im in ims]
print('Number of images:', len(ims))

Number of images: 1002


In [4]:
# RUN THIS BLOCK IF YOU HAVE SAM, SAM2 RESULTS IN THE DATA DIRECTORY - otherwise skip
vg.load_sam_results(version=1) # SAM
vg.load_sam_results(version=2) # SAM 2
vg.load_fc_clip_results() # FC-CLIP

print("Done")

Done


In [5]:
ims = []
with open('filtered_images_2.txt', 'r') as f:
    for line in f:
        ims.append(line.strip())

ims = [int(im) for im in ims]
print('Number of images:', len(ims))

Number of images: 1002


In [6]:
def randomly_visualize(vg, ims, n=1, include_regions=False):
    for i in range(n):
        im = random.choice(ims)
        scene_graph = vg.generate_scene_graph_json(im, include_regions=include_regions)

        # save to graph.json
        if i >= 1:
            filename = f"../graphviz/scene_graph{i}.json"
        else:
            filename = f"../graphviz/scene_graph.json"
        with open(filename, "w") as f:
            json.dump(scene_graph, f)

In [None]:
COUNT = 3
INCLUDE_REGIONS = True

In [None]:
randomly_visualize(vg, ims=ims, n=COUNT, include_regions=INCLUDE_REGIONS)
!python ../graphviz/visualize_scene_graph.py -n {COUNT}

In [46]:

def pairwise_visualize(vg, ims, n_pairs=1, include_regions=False):
    for i in range(n_pairs):
        im1 = random.choice(ims)
        im2 = random.choice(ims)
        
        scene_graph1 = vg.generate_scene_graph_json(im1, include_regions=include_regions)
        scene_graph2 = vg.generate_scene_graph_json(im2, include_regions=include_regions)
        combined = {"im1": scene_graph1, "im2": scene_graph2}
        # save to graph.json
        if i >= 1:
            filename = f"../graphviz/scene_graph_combined{i}.json"
        else:
            filename = f"../graphviz/scene_graph_combined.json"
        with open(filename, "w") as f:
            json.dump(combined, f)
            
pairwise_visualize(vg, ims=ims, n_pairs=2, include_regions=INCLUDE_REGIONS)
!python ../graphviz/pairwise_comparison.py -n {2}

## Correlations

In [9]:
############# Correlation between other features #############

# get features.csv
df_feat = pd.read_csv('features.csv')

# get correlation of all features on a heatmap
# create a heatmap of correlations between all features
df_feat["avg_object_similarity"] *= -1
df_feat["avg_region_similarity"] *= -1
df_feat["avg_rel_similarity"] *= -1

def print_cors(df_feat):
    print("Correlation between:")
    print("Predicted complexity and # of SAM 2 segmentations: ", df_feat['predicted_complexity'].corr(df_feat['# of SAM 2 segmentations'], method='spearman'))
    print("Predicted complexity and # of SAM segmentations: ", df_feat['predicted_complexity'].corr(df_feat['# of SAM segmentations'], method='spearman'))
    print("Predicted complexity and # of FC-CLIP segmentations: ", df_feat['predicted_complexity'].corr(df_feat['# of FC-CLIP classes'], method='spearman'))
    print("Predicted complexity and average object dissimilarity: ", df_feat['predicted_complexity'].corr(df_feat['avg_object_similarity'], method='spearman'))
    print("Predicted complexity and average region dissimilarity: ", df_feat['predicted_complexity'].corr(df_feat['avg_region_similarity'], method='spearman'))
    print("Predicted complexity and average relationship dissimilarity: ", df_feat['predicted_complexity'].corr(df_feat['avg_rel_similarity'], method='spearman'))

In [10]:
# get only the images in suitables
df_new = df_feat[df_feat['image_id'].isin(ims)]
print_cors(df_new)

Correlation between:
Predicted complexity and # of SAM 2 segmentations:  0.7929978046466357
Predicted complexity and # of SAM segmentations:  0.8594621800109393
Predicted complexity and # of FC-CLIP segmentations:  0.7739138548434051
Predicted complexity and average object dissimilarity:  0.16555065662803908
Predicted complexity and average region dissimilarity:  0.5225703352981856
Predicted complexity and average relationship dissimilarity:  0.5208962993656996


In [None]:
# HERE YOU CAN VISUALIZE THE IMAGES IN THE SUBSAMPLE BELONGING TO A SPECIFIC CLUSTER 

df_cluster = pd.read_csv('categories_all-mpnet-base-v2_80clusters_simple.csv')

# get images in a cluster
def get_images_in_cluster(cluster_id, df_cluster, ims):
    images = []
    for i in range(len(df_cluster)):
        if df_cluster['cluster'][i] == cluster_id:
            if df_cluster['Image_id'][i] in ims:
                images.append(df_cluster['Image_id'][i])
    return images

cluster_images_from_sample = get_images_in_cluster(6, df_cluster, ims)
len(cluster_images_from_sample)

In [None]:
for image in cluster_images_from_sample:
    vg.visualize_objects(image, 0)