In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import os
from sklearn.metrics.pairwise import cosine_similarity
from tqdm.notebook import tqdm, trange

from torchvision import transforms
from PIL import Image

In [4]:
#if access to laptop Pim: True 
# else: False
LAPTOP_PIM = True

In [5]:
#path to concept overview csv
csv_path = '../data/concept_overview_latent_space.csv'

In [19]:
#merge the dataframes
if os.path.exists(csv_path):
    cdf = pd.read_csv(csv_path, index_col=0)
else:
    training_concepts = pd.read_csv('../data/training_tau_latent.csv', index_col = 0)
    training_concepts.sort_index(inplace=True)
    training_concepts['Training tau'] = training_concepts['Training tau'].round(3)
    print(training_concepts.shape)
    training_concepts.head();

    latent_concept_pairs = pd.read_csv('../data/broden_concepts_glove_neighbors.csv', index_col = 0)
    print(latent_concept_pairs.shape)
    latent_concept_pairs.head();

    cdf= training_concepts.merge(latent_concept_pairs, 
                                 how='left', 
                                 left_on = 'Training Concept', 
                                 right_on='Broden concept').drop(columns= ['Broden concept'])
    cdf['GloVe Concept'] = cdf['GloVe neighbor']
    cdf.drop(columns=['GloVe neighbor'], inplace=True)
    print(cdf.shape)

    latent_concepts = pd.read_csv('../data/new_glove_tau_latent.csv', index_col =0)

    cdf = cdf.merge(latent_concepts,
                    how='left', 
                    left_on = 'GloVe Concept', 
                    right_on = 'New GloVe Concept').drop(columns=['New GloVe Concept'])
    cdf['GloVe tau'] = cdf['GloVe tau'].round(3)

    cdf.to_csv('../data/concept_overview_latent_space.csv')

In [20]:
cdf.loc[cdf['Training Concept'] == 'ground', :]

Unnamed: 0,Training tau,Training Concept,GloVe Concept,GloVe tau
220,-0.014,ground,soil,0.304
221,-0.014,ground,grounds,0.013
222,-0.014,ground,air,-0.299
223,-0.014,ground,side,-0.136
224,-0.014,ground,feet,0.446
225,-0.014,ground,.,-0.212
226,-0.014,ground,down,-0.284
227,-0.014,ground,surface,0.335
228,-0.014,ground,they,-0.229
229,-0.014,ground,moving,-0.184


In [26]:
### Load data to visualize images

#load transformed GloVe data
z2 = np.load('../data/z2.npy')

#load the corresponding concepts
with open('../data/embedding_concepts.data', 'rb') as filehandle:
    embedding_concepts = pickle.load(filehandle)

#load the glove neighbors aka the new concepts
with open('../data/glove_neighbors_concepts.data', 'rb') as filehandle:
    glove_neighbors = pickle.load(filehandle)

#load the transformed image representations
transformed_imgs = np.load('../data/transformed_images.npy')
transformed_ixs = np.load('../data/transformed_images_ixs.npy')

#load all SoN info
son_info = pd.read_csv('../data/son_votes.csv', index_col = 0)

In [7]:
#determine similarity for all original CAV concepts with the transformed images
training_neigh_similarity = cosine_similarity(z2.T[:len(embedding_concepts), :], transformed_imgs)

In [8]:
# extract the new concept vectors
new_concept_activations = z2.T[len(embedding_concepts):,:]

In [9]:
# Get the cosine similarity between the new concepts and the transformed imaged
cossim_latent = cosine_similarity(new_concept_activations, transformed_imgs)

This function saves the images closest related to Broden concept in the latent space

In [11]:
#function to save a number of concept images from the Broden concepts
%matplotlib inline
def save_broden_concept_imgs_to_drive(coi, num_imgs):
    
    img_dir = '../ICPR_Paper/concept_images/' + str(num_imgs) + '/'
    if not os.path.exists(img_dir):
        os.mkdir(img_dir)
    
    save_dir = '../ICPR_Paper/concept_images/' + str(num_imgs) + '/' + coi + '.png'
    
    if not os.path.exists(save_dir):
        
        # get the similarity for the concept of interest
        coi_sim = training_neigh_similarity[embedding_concepts.index(coi)]

        #get the n most similar images
        coi_neigh = coi_sim.argsort()[::-1][:num_imgs]
        coi_neigh = [transformed_ixs[j] for j in coi_neigh]

        paths = []
        scores = []

        for i in coi_neigh:
            img_name = son_info.loc[i, 'ID']
            img_score = son_info.loc[i, 'Average']

            paths.append('../son_images/images/' + str(img_name) + '.jpg')
            scores.append(img_score)

        #sort the images according to ascencing scenicness scores
        paths = [paths[i] for i in np.asarray(scores).argsort()]

        img_transform = transforms.Compose([transforms.CenterCrop(400)])
        fig = plt.figure(figsize=(15,15))
        ax = [fig.add_subplot(1, num_imgs, i+1) for i in range(num_imgs)]

        for idx, a in enumerate(ax):
            img = Image.open(paths[idx])
            crop_img = img_transform(img)
            crop_img = np.asarray(crop_img)
            a.axis('off')
            a.imshow(crop_img)

        fig.subplots_adjust(wspace=0, hspace=0)
        plt.savefig(save_dir, bbox_inches='tight')
        fig.clf()
        plt.close(fig)
    #     plt.show()

In [12]:
for coi in tqdm(embedding_concepts):
    save_broden_concept_imgs_to_drive(coi, 8)
    break

HBox(children=(FloatProgress(value=0.0, max=302.0), HTML(value='')))

This function saves the images for the new GloVe concepts

In [18]:
def save_glove_neighbor_imgs_to_drive(coi, num_imgs):
    img_dir = '../ICPR_Paper/concept_images/' + str(num_imgs) +'/'
    if not os.path.exists(img_dir):
        os.mkdir(img_dir)
    
    save_dir = '../ICPR_Paper/concept_images/' + str(num_imgs) + '/' + coi + '.png'
    if not os.path.exists(save_dir):
        cidx = glove_neighbors.index(coi)
        c_neigh = cossim_latent[cidx].flatten()
        c_imgs = c_neigh.argsort()[::-1][:num_imgs]
        ix_imgs = [transformed_ixs[j] for j in c_imgs]

        paths = []
        scores = []


        for i in ix_imgs:
            img_name = son_info.loc[i, 'ID']
            img_score = son_info.loc[i, 'Average']

            paths.append('../son_images/images/' + str(img_name) + '.jpg')
            scores.append(img_score)
        #sort the images according to ascencing scenicness scores
        paths = [paths[i] for i in np.asarray(scores).argsort()]

        img_transform = transforms.Compose([transforms.CenterCrop(400)])

        fig = plt.figure(figsize=(15,15))
        ax = [fig.add_subplot(1, num_imgs, i+1) for i in range(num_imgs)]

        for idx, a in enumerate(ax):
            img = Image.open(paths[idx])
            crop_img = img_transform(img)
            crop_img = np.asarray(crop_img)
            a.axis('off')
            a.imshow(crop_img)
        fig.subplots_adjust(wspace=0, hspace=0)
        plt.savefig(save_dir, bbox_inches='tight')
        fig.clf()
        plt.close(fig)
#     else:
#         print(coi)

In [19]:
for coi in tqdm(glove_neighbors):
    save_glove_neighbor_imgs_to_drive(coi, 8)
    break

HBox(children=(FloatProgress(value=0.0, max=2246.0), HTML(value='')))

In [20]:
# function to display the corresponding concepts and the nearest neighbor concepts from GloVe
def viewImages(coi, num_imgs): 
    
    # save Broden concept images
    save_broden_concept_imgs_to_drive(coi, num_imgs)
        
    if not os.path.exists('../ICPR_Paper/concept_and_neighbors/' + str(num_imgs) + '/'):
        os.mkdir('../ICPR_Paper/concept_and_neighbors/' + str(num_imgs) + '/')
    
    save_dir = '../ICPR_Paper/concept_and_neighbors/' + str(num_imgs) + '/' + coi + '.png'    
    
    # search for the neighboring GloVe concepts
    neighbor_concepts = list(cdf.loc[cdf['Training Concept'] == coi, 'GloVe Concept'].values)
    
    paths = ['../ICPR_Paper/concept_images/' + str(num_imgs) + '/' + coi + '.png']    
    correlations = [round(cdf.loc[cdf['Training Concept'] == 'embankment', 'Training tau'].values[0],3)]
    
    # for each neighboring concepts, save the corresponding images and the image paths
    for c in neighbor_concepts:
        save_glove_neighbor_imgs_to_drive(c, num_imgs)
        if c == '.':
            concept_img = '../ICPR_Paper/concept_images/' + str(num_imgs) + '/..png.png'
        else:
            concept_img = '../ICPR_Paper/concept_images/' + str(num_imgs) + '/' + c + '.png'
        paths.append(concept_img)
        correlations.append(round(cdf.loc[cdf['GloVe Concept'] == c, 'GloVe tau'].values[0],3))

    # create a image with the Broden concept and the neighboring concepts    
    fig = plt.figure(figsize=(15,15))
    ax = [fig.add_subplot(len(neighbor_concepts)+1, 1, i+1) for i in range(len(neighbor_concepts)+1)]

    for idx, a in enumerate(ax):
        img = Image.open(paths[idx])
        a.axis('off')
        if idx == 0:
            a.set_title(coi + ' (Broden concept)' + ' ' + str(correlations[idx]))
        else:
            a.set_title(neighbor_concepts[idx-1] + ' ' + str(correlations[idx]))
        a.imshow(img)
    fig.subplots_adjust(wspace=0, hspace=0.4)
    plt.savefig(save_dir, bbox_inches='tight')
    fig.clf()
    plt.close(fig)
#     plt.show()

In [21]:
viewImages('pasture', 4)

In [22]:
for c in tqdm(embedding_concepts):
    viewImages(c, 4)

HBox(children=(FloatProgress(value=0.0, max=302.0), HTML(value='')))




---

### Check the neighbors for concept "bass"

In [13]:
coi = 'bass'
cidx = glove_neighbors.index(coi)

1119


In [40]:
c_neighbors = cosine_similarity(new_concept_activations[cidx, :].reshape(1,-1), z2.T[:len(embedding_concepts), :])

In [43]:
top10 = c_neighbors.flatten().argsort()[::-1][:10]

In [47]:
for i in top10:
    print(embedding_concepts[i])

guitar
keyboard
lake
neck
alcove
sand
pitch
metal
bridge
sea


---

In [6]:
latent_concept_score = np.load('../data/latent_concept_score.npy')

In [8]:
df = pd.DataFrame(latent_concept_score)

In [34]:
top5_imgs = np.zeros((1375, len(glove_neighbors)))

for i in trange(len(glove_neighbors)):
    top5_score = df.iloc[:, i].nlargest(1375)
    top5_imgs[:, i] = top5_score
    

HBox(children=(FloatProgress(value=0.0, max=2246.0), HTML(value='')))




In [36]:
mean_scores = top5_imgs.mean(axis=0)

In [46]:
top10 = mean_scores.argsort()[::1][:10]

In [49]:
for n, j in enumerate(top10):
    print(n + 1, glove_neighbors[j])

1 beside
2 inside
3 located
4 sitting
5 floor
6 adjacent
7 wearing
8 cloth
9 sit
10 front
