In [1]:
import os
import random 
import pandas as pd
import h5py
import numpy as np
from tqdm import tqdm
from PIL import Image
from matplotlib import pyplot as plt
from quality_classifier.embedding_models import generate_dino_embedding_model, generate_siglip_embedding_model, load_ziped_images
from torchvision import transforms

# Loading Object informations 

In [2]:
df_car_quality_votes = pd.read_csv(
    '../data/car_quality_dataset_votes.csv')
len(df_car_quality_votes)

6219

# Generate Embeddings of Multi-View Renderings

In [7]:
def load_extracted_images(object_dir):
    all_files = os.listdir(object_dir)
    object_images = [Image.open(os.path.join(object_dir, file_name)).convert("RGB") for file_name in all_files if file_name.endswith(".png")]
    return object_images

In [None]:
is_ziped = False
# render_base_path = "../02_multi-view-rendering/car_renders_21"
render_base_path = "/mnt/damian/.objaverse/car_renders_new"
all_object_dirs = os.listdir(render_base_path)
car_uids = df_car_quality_votes['uid'].unique()
rendered_image_dict = {}
for uid in car_uids:
    if uid in rendered_image_dict:
        continue
    if is_ziped:
        filename = uid + ".zip"
        if filename in all_object_dirs:
            object_dir = os.path.join(render_base_path, filename)
            object_images = load_ziped_images(object_dir)
    else:
        object_dir = os.path.join(render_base_path, uid)
        if os.path.exists(object_dir):
            object_images = load_extracted_images(object_dir)
    rendered_image_dict[uid] = object_images 

len(rendered_image_dict)

In [None]:
# only keep the uids that are in the rendered_image_dict
df_car_quality_votes = df_car_quality_votes[df_car_quality_votes['uid'].isin(
    rendered_image_dict.keys())]
len(df_car_quality_votes)

# Embedding the images using DINO or Siglip

In [3]:
embedding_model = "DINOv2" # "DINOv2"
device = "cuda"

if embedding_model == "siglip":
    embed = generate_siglip_embedding_model(device=device)
    use_clip = True
elif embedding_model == "colpali":
    embed = generate_colpali_embedding_model(device=device)
    use_clip = False
else: # "DINOv2"
    embed = generate_dino_embedding_model(device=device)
    use_clip = False

Go through all images and estimate the embedding using the selected model. All embeddings will be saved with their corresponding uid in a dictionary.
> Note that this can take a while 

In [9]:
def generate_embeddings(embedding_fnc):
    all_embeddings = {}
    uids = list(rendered_image_dict.keys())
    # image preprocessing
    for uid in tqdm(uids):
        images = rendered_image_dict[uid]
        # resize all images to 576x576
        images = [image.resize((576, 576)) for image in images]
        images = [image for image in images]
        # generate the embeddings 
        outputs = embedding_fnc(images)
        embeddings = outputs.cpu().numpy()
        # print(embeddings.shape)
        all_embeddings[uid] = embeddings # .reshape(embeddings.shape[0]*embeddings.shape[1])
    return all_embeddings

In [None]:
batch_size = 200
concat_images = False
only_one_image_per_uid = True
amount_of_embeddings = 1

embedding_dataset = []
# first generate the embeddings without color jitter
all_embeddings = generate_embeddings(embed)
for uid, embedding in all_embeddings.items():
    vote = df_car_quality_votes[df_car_quality_votes['uid'] == uid]['vote'].values[0]
    embedding_dataset.append((embedding, vote, uid))
embedding_dataset = pd.DataFrame(embedding_dataset, columns=['embedding', 'vote', 'uid'])
embedding_dataset.head()

In [None]:
import gc
del rendered_image_dict
del embed
gc.collect()

## Saving embeddings
safe all_embeddings into a hdf5 file 

In [None]:
# save the embeddings of embedding_dataset as a hdf5 file
embeddings_list = embedding_dataset['embedding'].values
# make single numpy array from array of arrays
embeddings_np = np.stack(embeddings_list)
print(f"embeddings_np.shape: {embeddings_np.shape}")
filename = 'car_model_embedding_' + embedding_model + '_seq_4.h5'
with h5py.File(filename, 'w') as f:
    f.create_dataset('embedding_dataset', data=embeddings_np)
print("Embeddings saved to " + filename)

In [None]:
# save the votes of embedding_dataset as a hdf5 file
filename = 'car_model_votes_' + embedding_model + '_seq_4.h5'
votes_list = embedding_dataset['vote'].values
# make single numpy array from array of arrays
votes_np = np.stack(votes_list)
print(f"votes_np.shape: {votes_np.shape}")
with h5py.File(filename, 'w') as f:
    f.create_dataset('vote_dataset', data=votes_np)
print("Votes saved to " + filename)

In [None]:
# save the uids of embedding_dataset as a hdf5 file
filename = 'car_model_uids_' + embedding_model + '_seq_4.h5'
uids_list = embedding_dataset['uid'].values
with h5py.File(filename, 'w') as f:
    f.create_dataset('uid_dataset', data=uids_list)
print("Uids saved to " + filename)

### Saving the embeddings to a pkl file and generate PCA Model if needed

In [4]:
import pickle
import numpy as np
from sklearn.decomposition import PCA

load_embeddings = True

if load_embeddings:
    embeddings_filename = '../data/car_model_embedding_DINOv2_seq_4.h5'
    with h5py.File(embeddings_filename, 'r') as f:
        embeddings = f['embedding_dataset'][:]
else:
    embeddings = embeddings_np

if use_clip:
    reduce_embeddings = False
else:
    reduce_embeddings = True
print(embeddings.shape)

(7239, 4, 197376)


In [6]:
original_shape = embeddings.shape
all_embeddings_new = embeddings.reshape(embeddings.shape[0], embeddings.shape[1]*embeddings.shape[2])
# randomly sample 100000 embeddings from all_embeddings_new
# all_embeddings_new = all_embeddings_new[np.random.choice(all_embeddings_new.shape[0], 20000, replace=False)]
n_components = embeddings.shape[1]*768
print(f"compressing the data to shape: {embeddings.shape[0]}, {embeddings.shape[1]}, {768}")

if reduce_embeddings:
    pca = PCA(n_components=n_components, random_state=22)
    pca.fit(all_embeddings_new)
    all_embeddings_new = pca.transform(all_embeddings_new)
    print(all_embeddings_new.shape)
    print("sum of explained variance ratio: ", sum(pca.explained_variance_ratio_))
    pca_file = 'pca_model_' + embedding_model + '_seq_4_do_jit_sn_2.pkl'
    with open(pca_file, 'wb') as f:
        pickle.dump(pca, f)
    print("saved pca file")
    new_filename = 'car_model_embedding_' + embedding_model + '_seq_4_do_jit_sn_2_reduced.h5'
    all_embeddings_new = all_embeddings_new.reshape(original_shape[0], original_shape[1], -1)
    print(f"all_embeddings_new.shape: {all_embeddings_new.shape}")
    with h5py.File(new_filename, 'w') as f:
        f.create_dataset('embedding_dataset', data=all_embeddings_new)

compressing the data to shape: 7239, 4, 768
(7239, 3072)
sum of explained variance ratio:  0.9200398354987556
saved pca file
all_embeddings_new.shape: (7239, 4, 768)
