In [9]:
%pylab inline
import os, sys
import numpy as np
import h5py
from tqdm import tqdm

from plotting import plot_similar_images, plot_similar_spectra

ASTROCLIP_ROOT = "/mnt/ceph/users/polymathic/astroclip"

# Load the embeddings
embedding_loc = f"{ASTROCLIP_ROOT}/datasets/embeded_astroclip.hdf5"
with h5py.File(embedding_loc, "r") as f:
    images = f["image"][:]
    spectra = f["spectrum"][:]
    im_embeddings = f["image_embeddings"][:]
    sp_embeddings = f["spectrum_embeddings"][:]

# Normalize the embeddings
image_features_normed = im_embeddings / np.linalg.norm(
    im_embeddings, axis=-1, keepdims=True
)
spectrum_features_normed = sp_embeddings / np.linalg.norm(
    sp_embeddings, axis=-1, keepdims=True
)

%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib


In [None]:
# Look at some randomly selected galaxies
figure(figsize=[15, 15])
for i in range(15):
    for j in range(15):
        subplot(15, 15, i * 15 + j + 1)
        imshow(images[i * 15 + j + 500].T)
        title(i * 15 + j + 500)
        axis("off")
plt.subplots_adjust(wspace=0.1, hspace=0.11)

# Plot retrieved galaxy images

In [None]:
# Choose some galaxies to search for similar galaxies
ind_query = [7, 354, 526, 300]

# Find the indices of the galaxies in the dataset
im_sims = []

for ind in ind_query:
    # Compute the similarity between the query galaxy and all other galaxies
    sp_sim = spectrum_features_normed[ind] @ spectrum_features_normed.T
    im_sim = image_features_normed[ind] @ image_features_normed.T
    x_im_sim = image_features_normed[ind] @ spectrum_features_normed.T
    x_sp_sim = spectrum_features_normed[ind] @ image_features_normed.T

    # Find the 8 most similar galaxies (images)
    im_sims.append(
        {
            "sp_sim": [images[i] for i in argsort(sp_sim)[::-1][:8]],
            "im_sim": [images[i] for i in argsort(im_sim)[::-1][:8]],
            "x_im_sim": [images[i] for i in argsort(x_im_sim)[::-1][:8]],
            "x_sp_sim": [images[i] for i in argsort(x_sp_sim)[::-1][:8]],
        }
    )

In [None]:
def plot_similar_images(
    query_images: list,
    sims: dict,
    similarity_type: str = "im_sim",
    num_retrievals: int = 8,
    save_dir: str = None,
):
    """Functionality for plotting retrieved galaxy images"""
    plt.figure(figsize=[19.4, 6.1])
    for n, img in enumerate(query_images):
        plt.axis("off")
        for j in range(num_retrievals):
            plt.subplot(len(query_images), 13, n * 13 + j + 1 + 1)
            plt.imshow(sims[n][similarity_type][j].T)
            plt.axis("off")
    plt.subplots_adjust(wspace=0.01, hspace=0.0)
    plt.subplots_adjust(wspace=0.00, hspace=0.01)

    if save_dir is not None:
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        plt.savefig(os.path.join(save_dir, f"retrieval_{similarity_type}.png"))

In [None]:
# Image-image similarity
plot_similar_images(
    [images[i] for i in ind_query],
    im_sims,
    similarity_type="im_sim",
    num_retrievals=8,
    save_dir="../outputs/image_retrieval/",
)

In [None]:
# Spectrum-spectrum similarity
plot_similar_images(
    [images[i] for i in ind_query],
    im_sims,
    similarity_type="sp_sim",
    num_retrievals=8,
    save_dir="../outputs/image_retrieval/",
)

In [None]:
# Image-spectrum similarity
plot_similar_images(
    [images[i] for i in ind_query],
    im_sims,
    similarity_type="x_im_sim",
    num_retrievals=8,
    save_dir="../outputs/image_retrieval/",
)

In [None]:
# Spectrum-image similarity
plot_similar_images(
    [images[i] for i in ind_query],
    im_sims,
    similarity_type="x_sp_sim",
    num_retrievals=8,
    save_dir="../outputs/image_retrieval/",
)

# Plot retrieved galaxy spectra

In [None]:
# Choose some galaxies to search for similar galaxies
ind_query = [7, 77]

# Find the indices of the galaxies in the dataset
sp_sims = []

for ind in ind_query:
    # Compute the similarity between the query galaxy and all other galaxies
    sp_sim = spectrum_features_normed[ind] @ spectrum_features_normed.T
    im_sim = image_features_normed[ind] @ image_features_normed.T
    x_im_sim = image_features_normed[ind] @ spectrum_features_normed.T
    x_sp_sim = spectrum_features_normed[ind] @ image_features_normed.T

    # Find the 8 most similar galaxies (images)
    sp_sims.append(
        {
            "sp_sim": [spectra[i] for i in argsort(sp_sim)[::-1][:8]],
            "im_sim": [spectra[i] for i in argsort(im_sim)[::-1][:8]],
            "x_im_sim": [spectra[i] for i in argsort(x_im_sim)[::-1][:8]],
            "x_sp_sim": [spectra[i] for i in argsort(x_sp_sim)[::-1][:8]],
        }
    )

In [None]:
# Image-image similarity
plot_similar_spectra(
    [spectra[i] for i in ind_query],
    [images[i] for i in ind_query],
    sp_sims,
    similarity_type="im_sim",
    save_dir="./outputs/spectrum_retrieval/",
)

In [None]:
# Spectrum-spectrum similarity
plot_similar_spectra(
    [spectra[i] for i in ind_query],
    [images[i] for i in ind_query],
    sp_sims,
    similarity_type="sp_sim",
    save_dir="./outputs/spectrum_retrieval/",
)

In [None]:
# Image-spectrum similarity
plot_similar_spectra(
    [spectra[i] for i in ind_query],
    [images[i] for i in ind_query],
    sp_sims,
    similarity_type="x_im_sim",
    save_dir="./outputs/spectrum_retrieval/",
)

In [None]:
# Spectrum-image similarity
plot_similar_spectra(
    [spectra[i] for i in ind_query],
    [images[i] for i in ind_query],
    sp_sims,
    similarity_type="x_sp_sim",
    save_dir="./outputs/spectrum_retrieval/",
)