# Check if the download files contain errors or omissions. 

#### Check the errors and omissions.

In [None]:
import os
import json
import shutil

root_directory = 'The Download path.'
errors_and_omissions_files = []

# Iterate through the first-level directories from 0 to 159
for i in range(160):
    subdir_path = os.path.join(root_directory, str(i))
    if os.path.exists(subdir_path) and os.path.isdir(subdir_path):
        # Iterate through directories similar to "10005" as the second-level directories
        for subdirectory in os.listdir(subdir_path):
            subdirectory_path = os.path.join(subdir_path, subdirectory)
            if os.path.isdir(subdirectory_path):
                # Iterate through folders from "00000" to "00039"
                for j in range(40):
                    json_filename = f"{j:05}/{j:05}.json"
                    json_filepath = os.path.join(subdirectory_path, json_filename)
                    if os.path.exists(json_filepath):
                        try:
                            with open(json_filepath, 'r', encoding='utf-8') as json_file:
                                json_data = json.load(json_file)
                                if "offset" not in json_data:
                                    # If the JSON file does not contain the "offset" field, add its path to the corrupted files list
                                    errors_and_omissions_files.append(json_filepath)
                        except Exception as e:
                            # Catch JSON file parsing exceptions and add their paths to the corrupted files list
                            errors_and_omissions_files.append(json_filepath)
                    else:
                        errors_and_omissions_files.append(json_filepath)

# Store the list of these files as a JSON file
with open('check_download_json_error/download_json_error.json', 'w', encoding='utf-8') as output_file:
    json.dump(errors_and_omissions_files, output_file, indent=4)

#### Option 1: Re-download the errors and omissions files

In [None]:
with open('check_download_json_error/download_json_error.json', 'r') as json_file:
    json_content = json.load(json_file)

json_download_content = []
for json_line in json_content:
    json_line = json_line.split('/')[5] + '/' + json_line.split('/')[6]
    json_download_content.append(json_line)

json_download_content = list(set(json_download_content))

with open('check_download_json_error/add_download.json', 'w', encoding='utf-8') as output_file:
    json.dump(json_download_content, output_file, indent=4)

After you get the 'add_download.json', you can ru:
python ./download_gobjaverse_280k.py PATH_TO_DOWNLOAD check_download_json_error/add_download.json 10

#### Option 2: Delete the errors and omissions files and change 'download_280k_lvis.json'.

In [None]:
# Delete the errors and omissions files.

with open('check_download_json_error/add_download.json', 'r') as json_file:
    path_list = json.load(json_file)

current_directory = 'The Download path.'

for relative_path in path_list:
    full_path = os.path.join(current_directory, relative_path) 
    try:
        if os.path.exists(full_path) and os.path.isdir(full_path):
            shutil.rmtree(full_path) 
    except Exception as e:
        print(f"Can not delete {full_path}: {str(e)}")

In [None]:
# Change the download_280k_lvis.json

with open('annotations/download_280k_lvis.json', 'r') as json_file:
    ori_full_list = json.load(json_file)

with open('check_download_json_error/add_download.json', 'r') as json_file:
    error_list = json.load(json_file)

filtered_list = [x for x in ori_full_list if x not in error_list]

with open('annotations/download_280k_lvis_wo_error.json', 'w', encoding='utf-8') as output_file:
    json.dump(filtered_list, output_file, indent=4)

You can directly download the no error json file: download_280k_lvis_wo_error.json

In [None]:
from huggingface_hub import hf_hub_download

# 'gobjaverse_280k_index_to_objaverse.json' is provided by the official.
hf_hub_download(repo_id="alexzyqi/GPT4Point", filename='download_280k_lvis_wo_error.json', repo_type="dataset", local_dir='annotations/')
