# Install the dependencies

python3 -m pip install --break-system-packages --user requests tqdm pygltflib

### Step 1 - Get model sizes & path

Option 1 - Extract manually:
1. Run "git clone https://huggingface.co/datasets/allenai/objaverse" and then abort the command when it starts to download the models.
2. This will create a git repo folder, you then can run "python dump_gitcommits.py > out.txt" to dump the entire commit history
3. Then you call extract_models_from_dump("out.txt") to parse and get all the model paths and their sizes.

Option 2 - Use the pre-extracted json (model_sizes.json.gz)

In [29]:
import json 
import gzip

def extract_models_from_dump(file_path):
    model_sizes = {}
    current_model = None
    with open(file_path, 'r') as file:
        for line in file:
            # Get model path
            if ".glb" in line:
                # Extract model path
                model_path = line.split()[-1].strip()
                model_path = model_path.replace("b/", "")
                current_model = model_path
            # Get current_model size
            elif current_model and "size" in line: 
                
                size = int(line.split()[-1].strip()) 
                model_sizes[current_model] = size 
                current_model = None
    return model_sizes
 
 
 ## Option 1
#model_sizes = extract_models_from_dump("out.txt")  


## Option 2
with gzip.open("model_sizes.json.gz", 'rb') as gzip_file: 
    model_sizes = json.loads(gzip_file.read().decode('utf-8'))
    
print(len(model_sizes))

798759


### Download the meshes as per specified size limit

In [30]:
import os
import requests
from tqdm import tqdm  
from concurrent.futures import ThreadPoolExecutor 

def download_model(model_url, save_path):
    try:
        response = requests.get(model_url)
        if response.status_code == 200:
            with open(save_path, 'wb') as f:
                f.write(response.content)
                #print(f"Downloaded: {save_path}")
        else:
            print(f"Failed to download: {model_url}")
    except Exception as e:
        print(f"Error downloading: {model_url}, {e}")

def download_filtered_models(model_sizes, base_url, save_dir, minKb, maxKb, num_threads = 6, maxDownloadedMeshes = 250000):
    filtered_models = {model_path: size for model_path, size in model_sizes.items() if minKb < size < maxKb * 1024}
    
    downloaded_meshes = 0

    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        futures = []
        for model_path, size in filtered_models.items():
            if downloaded_meshes >= maxDownloadedMeshes:
                break

            folder_name = os.path.dirname(model_path)
            sub_folder = os.path.join(save_dir, folder_name)
            os.makedirs(sub_folder, exist_ok=True)
            
            file_name = os.path.basename(model_path)
            save_path = os.path.join(sub_folder, file_name)
            
            if not os.path.exists(save_path):
                model_url = f"{base_url}/{model_path}?download=true"
                futures.append(executor.submit(download_model, model_url, save_path))
                
                downloaded_meshes += 1
                
        for future in tqdm(futures, total=len(futures)):
            future.result()
            
base_url = "https://huggingface.co/datasets/allenai/objaverse/resolve/main"  
save_dir = f'./objaverse' 

os.makedirs(save_dir, exist_ok=True)   
download_filtered_models(model_sizes, base_url, save_dir, minKb = 10, maxKb = 200, num_threads = 6, maxDownloadedMeshes = 10) 

100%|██████████| 10/10 [00:02<00:00,  4.32it/s]


### Download metadata

In [31]:
import os
import requests
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
 
def download_file(url, folder_path, filename):
    url = url + "?download=true"
    print(url)
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()  # If the response was successful, no Exception will be raised
        with open(os.path.join(folder_path, filename), 'wb') as f:
            f.write(response.content) 
        return True
    except Exception as err:
        print(f"Failed to download {filename}. Error: {err}")
        return False

def download_metadata(base_url, save_dir, num_threads=6):
    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        futures = []
        for i in range(1, 161):
            filename = f"000-{i:03d}.json.gz"
            file_url = base_url + filename
            futures.append(executor.submit(download_file, file_url, save_dir, filename))
         
        for future in tqdm(futures, total=len(futures)):
            result = future.result()
            if not result:
                continue
            
base_url = "https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/" 
save_dir = './objaverse/metadata'
os.makedirs(save_dir, exist_ok=True)   

download_metadata(base_url, save_dir)

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-001.json.gz?download=truehttps://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-002.json.gz?download=true

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-003.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-004.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-005.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-006.json.gz?download=true


  1%|          | 1/160 [00:00<01:58,  1.34it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-007.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-008.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-009.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-010.json.gz?download=true


  3%|▎         | 5/160 [00:00<00:22,  6.87it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-011.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-012.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-013.json.gz?download=true


  4%|▍         | 7/160 [00:01<00:28,  5.44it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-014.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-015.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-016.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-017.json.gz?download=true


  7%|▋         | 11/160 [00:01<00:18,  8.10it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-018.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-019.json.gz?download=true


  9%|▉         | 14/160 [00:02<00:17,  8.33it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-020.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-021.json.gz?download=true


 10%|█         | 16/160 [00:02<00:16,  8.69it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-022.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-023.json.gz?download=true


 11%|█▏        | 18/160 [00:02<00:15,  9.28it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-024.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-025.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-026.json.gz?download=true


 15%|█▌        | 24/160 [00:02<00:11, 11.50it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-027.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-028.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-029.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-030.json.gz?download=true


 16%|█▋        | 26/160 [00:03<00:12, 10.80it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-031.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-032.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-033.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-034.json.gz?download=true


 19%|█▉        | 31/160 [00:03<00:12, 10.35it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-035.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-036.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-037.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-038.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-039.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-040.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-041.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-042.json.gz?download=true


 21%|██        | 33/160 [00:04<00:20,  6.11it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-043.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-044.json.gz?download=true


 25%|██▌       | 40/160 [00:04<00:11, 10.23it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-045.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-046.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-047.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-048.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-049.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-050.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-051.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-052.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-053.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-054.js

 34%|███▍      | 55/160 [00:06<00:08, 12.41it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-058.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-059.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-060.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-061.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-062.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-063.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-064.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-065.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-066.json.gz?download=true


 39%|███▉      | 63/160 [00:07<00:08, 11.52it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-067.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-068.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-069.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-070.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-071.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-072.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-073.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-074.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-075.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-076.js

 41%|████▏     | 66/160 [00:08<00:14,  6.64it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-078.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-079.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-080.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-081.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-082.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-083.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-084.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-085.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-086.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-087.js

 46%|████▌     | 73/160 [00:09<00:14,  6.19it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-088.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-089.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-090.json.gz?download=true


 53%|█████▎    | 85/160 [00:09<00:06, 11.73it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-091.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-092.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-093.json.gz?download=true


 56%|█████▋    | 90/160 [00:10<00:05, 11.69it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-094.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-095.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-096.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-097.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-098.json.gz?download=true


 59%|█████▉    | 94/160 [00:10<00:06, 10.10it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-099.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-100.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-101.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-102.json.gz?download=true


 61%|██████    | 97/160 [00:10<00:05, 10.72it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-103.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-104.json.gz?download=true


 62%|██████▎   | 100/160 [00:11<00:05, 10.36it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-105.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-106.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-107.json.gz?download=true


 64%|██████▍   | 102/160 [00:11<00:05, 11.11it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-108.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-109.json.gz?download=true


 65%|██████▌   | 104/160 [00:11<00:05,  9.34it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-110.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-111.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-112.json.gz?download=true


 68%|██████▊   | 108/160 [00:12<00:05,  8.93it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-113.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-114.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-115.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-116.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-117.json.gz?download=true


 69%|██████▉   | 111/160 [00:12<00:05,  8.63it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-118.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-119.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-120.json.gz?download=true


 73%|███████▎  | 117/160 [00:13<00:04, 10.44it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-121.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-122.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-123.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-124.json.gz?download=true


 74%|███████▍  | 119/160 [00:13<00:05,  7.83it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-125.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-126.json.gz?download=true


 76%|███████▌  | 121/160 [00:13<00:04,  8.17it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-127.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-128.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-129.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-130.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-131.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-132.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-133.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-134.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-135.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-136.js

 76%|███████▋  | 122/160 [00:14<00:10,  3.47it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-138.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-139.json.gz?download=true


 84%|████████▍ | 134/160 [00:15<00:02,  9.48it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-140.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-141.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-142.json.gz?download=true


 85%|████████▌ | 136/160 [00:15<00:02,  9.20it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-143.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-144.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-145.json.gz?download=true


 88%|████████▊ | 140/160 [00:15<00:01, 11.75it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-146.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-147.json.gz?download=true


 89%|████████▉ | 142/160 [00:15<00:01, 11.20it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-148.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-149.json.gz?download=true


 91%|█████████▏| 146/160 [00:16<00:01, 11.27it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-150.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-151.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-152.json.gz?download=true


 92%|█████████▎| 148/160 [00:16<00:01, 11.57it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-153.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-154.json.gz?download=true


 94%|█████████▍| 150/160 [00:16<00:01,  9.82it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-155.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-156.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-157.json.gz?download=true


 95%|█████████▌| 152/160 [00:16<00:00,  9.71it/s]

https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-158.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-159.json.gz?download=true
https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-160.json.gz?download=true


100%|██████████| 160/160 [00:18<00:00,  8.62it/s]

Failed to download 000-160.json.gz. Error: 404 Client Error: Not Found for url: https://huggingface.co/datasets/allenai/objaverse/resolve/main/metadata/000-160.json.gz?download=true





### Extract the metadata to a JSON with only the relevant information, e.g the models you downloaded

In [32]:
import os
import glob
import gzip
import json
from pygltflib import GLTF2, BufferFormat

# Load metadata
metadata = {}
filtered_metadata = {}
metadata_path = './objaverse/metadata'
for file_name in os.listdir(metadata_path):
    if file_name.endswith(".gz"):
        file_path = os.path.join(metadata_path, file_name)
        with gzip.open(file_path, 'rt', encoding='utf-8') as f:
            metadata = json.load(f)

input_directory = './objaverse/glbs'
output_directory = './objaverse/gltf_embedded'

os.makedirs(output_directory, exist_ok=True)

existing_models = {}
for file_path in glob.iglob(input_directory + '/**/*', recursive=True):
    if os.path.isfile(file_path):
        file_name, file_extension = os.path.splitext(file_path)
        existing_models[os.path.basename(file_name)] = file_path

        if file_extension.lower() == ".glb" and os.path.basename(file_name) in metadata:
            gltf_embedded = GLTF2().load(file_path) 
            gltf_embedded.convert_buffers(BufferFormat.DATAURI)
            gltf_file_path = os.path.join(output_directory, os.path.basename(file_name) + ".gltf")
            gltf_embedded.save(gltf_file_path)
            data = metadata[os.path.basename(file_name)]
            if data["license"] != "by":
                continue
            filtered_metadata = {
                "@context": {
                    "dc": "http://purl.org/dc/elements/1.1/"
                },
                "@id": data["uid"],
                "dc:title": data["name"],
                "dc:creator": {
                    "@id": data["user"]["uid"],
                    "dc:name": data["user"]["username"]
                },
                "dc:description": data["description"],
                "dc:date": data["createdAt"],
                "dc:identifier": data["uri"],
                "dc:source": data["viewerUrl"],
                "dc:language": "en",
                "dc:rights": data["license"],
                "dc:subject": data["tags"],
                "dc:type": "3D Model",
                "dc:relation": data["user"]["profileUrl"],
                "dc:coverage": "",
                "dc:contributor": []
            }

            with open(gltf_file_path, 'r') as f:
                gltf_json = json.load(f)

            xmp_extension = {
                "KHR_xmp_json_ld": {
                    "packets": [filtered_metadata]
                }
            }

            if 'extensions' in gltf_json['asset']:
                if 'KHR_xmp_json_ld' in gltf_json['asset']['extensions']:
                    gltf_json['asset']['extensions']['KHR_xmp_json_ld']['packets'].append(filtered_metadata)
                else:
                    gltf_json['asset']['extensions'].update(xmp_extension)
            else:
                gltf_json['asset']['extensions'] = xmp_extension

            gltf_json['asset']['extensions']['KHR_xmp_json_ld']['packet'] = len(gltf_json['asset']['extensions']['KHR_xmp_json_ld']['packets']) - 1

            if 'extensionsUsed' in gltf_json:
                if "KHR_xmp_json_ld" not in gltf_json['extensionsUsed']:
                    gltf_json['extensionsUsed'].append("KHR_xmp_json_ld")
            else:
                gltf_json['extensionsUsed'] = ["KHR_xmp_json_ld"]

            with open(gltf_file_path, 'w') as f:
                json.dump(gltf_json, f, indent=4)


python3 -m pip install --break-system-packages --user requests tqdm pygltflib

python3 -m pip install --break-system-packages --user requests tqdm pygltflib

python3 -m pip install --break-system-packages --user requests tqdm pygltflib