In [2]:
import zipfile
import os
import json
import os
import re
import sys
import gzip
import json
import pandas as pd
import numpy as np
from autofaiss import build_index
sys.path.append('/viscam/u/iamisaac/clip-retrieval')
from clip_retrieval.clip_inference import main

  from .autonotebook import tqdm as notebook_tqdm


In [24]:

def read_file_names(zip_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        # List all files in the ZIP
        print(f"File Length: {len(zip_ref.infolist())}")
        print("Files in the ZIP:")
        num_read = 0
        for file_info in zip_ref.infolist():
            print(f"- {file_info.filename}")

def read_sequence(zip_path):
    seen = set()
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        # List all files in the ZIP
        print(f"File Length: {len(zip_ref.infolist())}")
        print("Files in the ZIP:")
        num_read = 0
        for file_info in zip_ref.infolist():
            file_name = file_info.filename.split("/")
            file_name = [item for item in file_name if item != ""]
            if len(file_name) >= 2 and not file_name[1][:3].isnumeric():
                print(file_info.filename)



def read_set_lists(zip_path, split_name="val"):
    split_metadata = []
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        # List all files in the ZIP
        print(f"File Length: {len(zip_ref.infolist())}")
        print("Files in the ZIP:")
        set_list_files = []
        num_read = 0
        for file_info in zip_ref.infolist():
            file_name = file_info.filename.split("/")
            if len(file_name) == 3 and file_name[1] == "set_lists" and file_name[2].startswith("set_lists_") and file_name[2].endswith('.json'):
                set_list_files.append(file_info)
        for set_list_file in set_list_files:
            with zip_ref.open(set_list_file) as file:
                print(f"\nContents of {set_list_file.filename}:")
                json_data = json.loads(file.read().decode('utf-8'))
                split_metadata.extend(json_data[split_name])
    return split_metadata

def get_img_dirs(img_root_dir):
    # Regular expression pattern to match directory names
    # with two underscores separated by numbers
    pattern = r'\d+_\d+_\d+'
    paths = []
    for dir_name in os.listdir(img_root_dir):
        full_path = os.path.join(img_root_dir, dir_name)
        if os.path.isdir(full_path) and re.match(pattern, dir_name):
            images_path = os.path.join(full_path, "images")
            paths.append(images_path)
    return paths
    
def run_clip_inference(img_dir, output_path):
    print(f"Running clip inference for {img_dir} saved to {output_path}")
    os.makedirs(output_path, exist_ok=True)
    main.main(
        input_dataset=img_dir,
        output_folder=output_path,
        clip_model="ViT-L/14"
    )

def get_viewpoint_from_annotations(seq_str, frame_number, annotations):
    viewpoint = {}
    for annotation in annotations:
        if annotation["sequence_name"] == seq_str and annotation["frame_number"] == frame_number:
            R = annotation["viewpoint"]["R"]
            R = str([item for sublist in R for item in sublist])
            T = str(annotation["viewpoint"]["T"])
            
            viewpoint["R"] = R
            viewpoint["T"] = T
            viewpoint["focal_length"] = str(annotation["viewpoint"]["focal_length"])
            viewpoint["principal_point"] = str(annotation["viewpoint"]["principal_point"])
            viewpoint["intrinsics_format"] = annotation["viewpoint"]["intrinsics_format"]
            return viewpoint

def merge_embeddings_into_parquet_files(
        src_parquet_file_path,
        src_npy_file_path,
        target_directory,
        all_annotations,
):
    """
    Merges embedding, metdata, and existing parquet file into a single parquet file for a given sequence.
    """

    metadata = pd.read_parquet(src_parquet_file_path)
    metadata_embedding = np.load(src_npy_file_path)
    metadata_embedding = metadata_embedding.tolist()
    annotations = {"sequence_name":[], "frame_number":[], "embedding": [], "image_path":[], "R":[], "T":[], "focal_length":[], "intrinsics_format":[], "principal_point":[]}
    for file_path, embedding in zip(metadata["image_path"], metadata_embedding):
        split_file_path = file_path.split("/")
        frame_num = int(split_file_path[-1][-8:-4])

        seq_name = split_file_path[-3]
        viewpoint = get_viewpoint_from_annotations(seq_name, frame_num, all_annotations)
        if not viewpoint:
            print(f"viewpoint for {seq_name} and {frame_num} not found !!!")
            continue
            
        annotations["sequence_name"].append(seq_name)
        annotations["frame_number"].append(frame_num)
        annotations["embedding"].append(embedding)
        annotations["image_path"].append(file_path)
        
        # Add rotation, position, and camera
        annotations["R"].append(viewpoint["R"])
        annotations["T"].append(viewpoint["T"])
        annotations["focal_length"].append(viewpoint["focal_length"])
        annotations["principal_point"].append(viewpoint["principal_point"])
        annotations["intrinsics_format"].append(viewpoint["intrinsics_format"])
        
    new_metadata = pd.DataFrame(annotations)
    metadata_dst_path = os.path.join(target_directory, f'{seq_name}.parquet')
    print(f"Saving merged parquet file to {metadata_dst_path} with {len(metadata_embedding)} rows")
    os.makedirs(target_directory, exist_ok=True)
    new_metadata.to_parquet(
        metadata_dst_path
    )

def index_co3d(obj_paths, output_path="/viscam/u/iamisaac/sfs/co3d_embeddings/final"):
    """
    obj_paths: A list of paths to individual object directories. ex: ['path/to/co3d/apple', 'path/to/co3d/car']
    
    Expected file structure of each path in obj_paths:
        - path/to/apple/sequence_annotations.jgz
        - path/to/apple/76_7808_16298/images/frame000001.jpg
    """
    parquet_output_path = os.path.join(output_path, "metadata")
    for obj_path in obj_paths:
        """
        Given: 
            obj_path = 'path/to/co3d/apple'
            output_path = "path/to/index"
        
        embedding_output_path = "path/to/index/apple"
        """
        embedding_output_path = os.path.join(output_path, os.path.basename(obj_path))
        img_dirs = get_img_dirs(obj_path)
        num_iter = 0
        annotations_path = os.path.join(obj_path, "frame_annotations.jgz")
        with gzip.open(annotations_path, 'rt', encoding='utf-8') as file:
            annotations = json.load(file)
            for img_dir in img_dirs:
                seq_num = img_dir.split("/")[-2]
                clip_inference_output_path = os.path.join(embedding_output_path, seq_num)
                parquet_file_path = os.path.join(clip_inference_output_path, "metadata", "metadata_0.parquet")
                embedding_file_path = os.path.join(clip_inference_output_path, "img_emb", "img_emb_0.npy")
                
                # Run clip inference and save clip embeddings in ../img_emb/img_emb_0.npy
                run_clip_inference(img_dir, clip_inference_output_path)
                
                # Merge metadata from ../metadata/metadata_0.parquet and clip embeddings from ../img_emb/img_emb_0.npy
                # and save to parquet_output_path with annotations
                merge_embeddings_into_parquet_files(parquet_file_path, embedding_file_path, parquet_output_path, annotations)
                num_iter +=1
                if num_iter == 10:
                    break
    build_index(
        embeddings=os.path.join(output_path, 'metadata'),
        index_path=os.path.join(output_path,'image.index'),
        file_format='parquet',
    )


In [26]:
# zip_path = "/viscam/data/co3d"
# zip_file_name = "keyboard_000.zip"
# # read_file_names(os.path.join(zip_path, zip_file_name))
# read_sequence(os.path.join(zip_path, "keyboard_001.zip"))
obj_paths = ["/viscam/u/iamisaac/sfs/co3d_data/car", "/viscam/data/co3d/apple"]
index_co3d(obj_paths, "/viscam/u/iamisaac/sfs/co3d_embeddings/final4")

Running clip inference for /viscam/u/iamisaac/sfs/co3d_data/car/106_12674_24709/images saved to /viscam/u/iamisaac/sfs/co3d_embeddings/final4/car/106_12674_24709
The number of samples has been estimated to be 202
Starting the worker
dataset is 59
Starting work on task 0
 sample_per_sec 18 ; sample_count 202 {'R': '[0.7513739466667175, 0.5094317197799683, 0.41942405700683594, -0.17679552733898163, -0.4569590985774994, 0.8717406392097473, 0.6357519626617432, -0.7291554808616638, -0.2532818913459778]', 'T': '[0.96485835313797, 1.8514238595962524, 11.39902114868164]', 'focal_length': '[13.506386756896973, 13.506386756896973]', 'principal_point': '[-0.004629629664123058, -0.004629629664123058]', 'intrinsics_format': 'ndc_isotropic'}
{'R': '[0.7476114630699158, 0.5121258497238159, 0.4228525459766388, -0.18012277781963348, -0.4564783275127411, 0.8713112473487854, 0.639244019985199, -0.7275676131248474, -0.24902290105819702]', 'T': '[1.012723684310913, 1.9394097328186035, 10.904176712036133]',

2024-08-24 15:36:38,610 [INFO]: Using 128 omp threads (processes), consider increasing --nb_cores if you have more
2024-08-24 15:36:38,610 [INFO]: Launching the whole pipeline 08/24/2024, 15:36:38
2024-08-24 15:36:38,611 [INFO]: Reading total number of vectors and dimension 08/24/2024, 15:36:38


viewpoint for 338_34901_63372 and 202 not found !!!
Saving merged parquet file to /viscam/u/iamisaac/sfs/co3d_embeddings/final4/metadata/338_34901_63372.parquet with 201 rows


100%|██████████| 20/20 [00:00<00:00, 4710.06it/s]
2024-08-24 15:36:38,640 [INFO]: There are 3609 embeddings of dim 768
2024-08-24 15:36:38,640 [INFO]: >>> Finished "Reading total number of vectors and dimension" in 0.0290 secs
2024-08-24 15:36:38,640 [INFO]: 	Compute estimated construction time of the index 08/24/2024, 15:36:38
2024-08-24 15:36:38,641 [INFO]: 		-> Train: 16.7 minutes
2024-08-24 15:36:38,642 [INFO]: 		-> Add: 0.0 seconds
2024-08-24 15:36:38,642 [INFO]: 		Total: 16.7 minutes
2024-08-24 15:36:38,642 [INFO]: 	>>> Finished "Compute estimated construction time of the index" in 0.0012 secs
2024-08-24 15:36:38,643 [INFO]: 	Checking that your have enough memory available to create the index 08/24/2024, 15:36:38
2024-08-24 15:36:38,643 [INFO]: 12.1MB of memory will be needed to build the index (more might be used if you have more)
2024-08-24 15:36:38,644 [INFO]: 	>>> Finished "Checking that your have enough memory available to create the index" in 0.0008 secs
2024-08-24 15:36:38

In [None]:
"""
    "keyboard_000.zip":
        - keyboard/
        - keyboard/set_lists/
        - keyboard/set_lists/set_lists_fewview_dev.json
        - keyboard/set_lists/set_lists_fewview_test.json
        - keyboard/set_lists/set_lists_fewview_train.json
        - keyboard/set_lists/set_lists_manyview_test_0.json
        - keyboard/sequence_annotations.jgz
        - keyboard/LICENSE
        - keyboard/eval_batches/
        - keyboard/eval_batches/eval_batches_fewview_dev.json
        - keyboard/eval_batches/eval_batches_fewview_test.json
        - keyboard/eval_batches/eval_batches_fewview_train.json
        - keyboard/eval_batches/eval_batches_manyview_test_0.json
        - keyboard/frame_annotations.jgz

    "keyboard_001.zip" ~ "keyboard_008.zip":
        - keyboard/76_7808_16298/depth_masks/frame000001.png
        - keyboard/76_7808_16298/depths/frame000001.jpg.geometric.png
        - keyboard/76_7808_16298/images/frame000001.jpg
        - keyboard/76_7704_16159/masks/frame000001.png  
        - keyboard/76_7704_16159/pointcloud.ply
    
"""

In [15]:
a = [[1], [2, 3]]
print(str(a))
print(list(str(a)))

[[1], [2, 3]]
['[', '[', '1', ']', ',', ' ', '[', '2', ',', ' ', '3', ']', ']']
