In [2]:
!pip install requests tqdm trimesh thingi10k numpy-stl numpy



In [None]:
import os
import requests
import zipfile
import shutil
import trimesh
import thingi10k
import numpy as np
from stl import mesh
from tqdm import tqdm
from urllib.parse import urlencode
import warnings

In [2]:
import os
import requests
import zipfile
import shutil
import trimesh
import numpy as np
import thingi10k
from stl import mesh
from tqdm import tqdm
import warnings
from urllib.parse import urlencode

class DatasetsManager:
    """
    A manager class to download, process, and structure 3D datasets for ML tasks.
    Follows a consistent structure: data/[dataset]/[category]/[type]/[file]
    """
    def __init__(self, root_dir="data"):
        self.root_dir = root_dir
        self.thingi10k_dir = os.path.join(root_dir, "thingi10k")
        self.modelnet_dir = os.path.join(root_dir, "ModelNet40")
        self.custom_data_dir = os.path.join(root_dir, "custom_dataset")
        self.abc_dir = os.path.join(root_dir, "abc_dataset")
        
        os.makedirs(self.root_dir, exist_ok=True)
        print(f"DatasetsManager initialized. All data will be stored in '{self.root_dir}'")

    def prepare_all_datasets(self):
        print("\n--- Starting all dataset preparation processes ---")
        self.prepare_thingi10k()
        self.prepare_modelnet40()
        self.prepare_abc_dataset()
        self.prepare_custom_dataset()
        print("\n--- All dataset preparation processes are complete! ---")

    def prepare_thingi10k(self):
        print("\n[1/4] Processing Thingi10k...")
        models_out_dir = os.path.join(self.thingi10k_dir, "models")
        os.makedirs(models_out_dir, exist_ok=True)

        warnings.filterwarnings("ignore", category=UserWarning, module="thingi10k")
        try:
            thingi10k.init()
        except Exception as e:
            print(f"Could not initialize Thingi10k dataset. Maybe servers are down? Error: {e}")
            return

        for entry in tqdm(thingi10k.dataset(), desc="Converting Thingi10k"):
            file_id = entry["file_id"]
            output_filepath = os.path.join(models_out_dir, f"{file_id}.stl")
            if os.path.exists(output_filepath):
                continue
            
            try:
                with np.load(entry["file_path"]) as data:
                    vertices = np.asarray(data["vertices"], dtype=np.float64)
                    facets = np.asarray(data["facets"], dtype=np.int32)
                
                if vertices.shape[0] < 3 or facets.shape[0] == 0: continue

                mesh_data = vertices[facets]
                stl_mesh = mesh.Mesh(np.zeros(mesh_data.shape[0], dtype=mesh.Mesh.dtype))
                stl_mesh.vectors = mesh_data
                stl_mesh.save(output_filepath)
            except Exception as e:
                print(f"Skipping Thingi10k file_id {file_id}. Error: {e}")
        print("Thingi10k preparation complete.")

    def prepare_modelnet40(self):
        print("\n[2/4] Processing ModelNet40...")
        url = "http://modelnet.cs.princeton.edu/ModelNet40.zip"
        zip_path = os.path.join(self.root_dir, "ModelNet40.zip")
        
        if os.path.exists(self.modelnet_dir) and any(os.scandir(self.modelnet_dir)):
            is_processed = True
            for item in os.scandir(self.modelnet_dir):
                if item.is_dir():
                    # Check if 'train' or 'test' folders are gone
                    if 'train' in os.listdir(item.path) or 'test' in os.listdir(item.path):
                        is_processed = False
                        break
            if is_processed:
                print("ModelNet40 appears to be processed already. Skipping.")
                return

        if not os.path.exists(os.path.join(self.root_dir, "ModelNet40")):
             self._download_file(url, zip_path)
             print(f"Extracting {zip_path}...")
             with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                 zip_ref.extractall(self.root_dir)
             os.remove(zip_path)

        for category_d in tqdm(os.scandir(self.modelnet_dir), desc="Processing ModelNet40 categories"):
            if not category_d.is_dir() or category_d.name.startswith('__'):
                continue
            
            for subfolder in ["train", "test"]:
                subfolder_path = os.path.join(category_d.path, subfolder)
                if not os.path.exists(subfolder_path):
                    continue
                
                for off_file in os.scandir(subfolder_path):
                    if off_file.name.endswith(".off"):
                        stl_filename = off_file.name.replace(".off", ".stl")
                        stl_filepath = os.path.join(category_d.path, stl_filename)
                        if os.path.exists(stl_filepath):
                            continue
                        try:
                            mesh = trimesh.load_mesh(off_file.path)
                            mesh.export(stl_filepath)
                        except Exception as e:
                            print(f"Failed to process {off_file.path}: {e}")
                shutil.rmtree(subfolder_path)
        print("ModelNet40 preparation complete.")
    
    # --- ИСПРАВЛЕННЫЙ МЕТОД ---
    def prepare_abc_dataset(self):
        """[3/4] Prepares ABC Dataset: downloads, extracts, filters for '512' .obj, and converts to .stl."""
        print("\n[3/4] Processing ABC Dataset...")
        url = "https://archive.nyu.edu/retrieve/120666/abc_full_100k_v00.zip"
        zip_path = os.path.join(self.root_dir, "abc_full_100k_v00.zip")
        
        # Correct path to the extracted folder, as seen in your screenshot.
        extracted_path = os.path.join(self.root_dir, "100k") 
        
        models_out_dir = os.path.join(self.abc_dir, "models")
        os.makedirs(models_out_dir, exist_ok=True)

        if os.path.exists(models_out_dir) and len(os.listdir(models_out_dir)) > 1000:
             print("ABC Dataset appears to be processed already. Skipping.")
             return

        if not os.path.exists(extracted_path):
            self._download_file(url, zip_path)
            print(f"Extracting {zip_path} (this may take a while)...")
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall(self.root_dir)
            os.remove(zip_path)

        # Paths to the '512' folders are built from the correct base path '100k'.
        dirs_to_process = [
            os.path.join(extracted_path, "train", "512"),
            os.path.join(extracted_path, "test", "512")
        ]

        for source_dir in tqdm(dirs_to_process, desc="Processing ABC splits"):
            if not os.path.exists(source_dir):
                print(f"Warning: Source directory not found: {source_dir}")
                continue
            
            for obj_file in tqdm(os.scandir(source_dir), desc=f"Converting from {os.path.basename(source_dir)}", leave=False):
                if not obj_file.name.endswith(".obj"):
                    continue
                
                stl_filename = obj_file.name.replace(".obj", ".stl")
                stl_filepath = os.path.join(models_out_dir, stl_filename)
                
                if os.path.exists(stl_filepath):
                    continue
                    
                try:
                    mesh = trimesh.load_mesh(obj_file.path)
                    mesh.export(stl_filepath)
                except Exception as e:
                    print(f"Failed to process {obj_file.path}: {e}")

        # Cleanup will now target the correct folder.
        print(f"Cleaning up original extracted folder: {extracted_path}")
        shutil.rmtree(extracted_path)
        print("ABC Dataset preparation complete.")
        
    def prepare_custom_dataset(self):
        """[4/4] Downloads and structures the custom dataset from Yandex.Disk."""
        print("\n[4/4] Processing Custom Dataset from Yandex.Disk...")
        os.makedirs(self.custom_data_dir, exist_ok=True)
        
        if os.path.exists(self.custom_data_dir) and len(os.listdir(self.custom_data_dir)) > 0:
            print("Custom dataset appears to be prepared. Skipping.")
            return

        files_to_download = {
            "train_data": "https://disk.yandex.ru/d/RRXJu9ZtEmSXzQ",
            "test_data": "https://disk.yandex.ru/d/TmbB7BsGzg1dQQ",
        }
        for key, val in files_to_download.items():
            self.download_and_unzip_yandex_disk(val, self.custom_data_dir)

        print("Custom dataset preparation complete.")

    def _download_file(self, url, filename):
        print(f"Downloading {url} to {filename}...")
        try:
            with requests.get(url, stream=True) as r:
                r.raise_for_status()
                total_size = int(r.headers.get('content-length', 0))
                with open(filename, 'wb') as f, tqdm(
                    total=total_size, unit='iB', unit_scale=True, desc=os.path.basename(filename)
                ) as bar:
                    for chunk in r.iter_content(chunk_size=8192):
                        size = f.write(chunk)
                        bar.update(size)
        except requests.exceptions.RequestException as e:
            print(f"Error downloading file: {e}")
            if os.path.exists(filename): os.remove(filename)
            raise

    def download_and_unzip_yandex_disk(self, public_url, extract_to_folder):
        print(f"Starting to process Yandex.Disk link: {public_url}")
        base_api_url = "https://cloud-api.yandex.net/v1/disk/public/resources/download?"
        api_url = base_api_url + urlencode(dict(public_key=public_url))
        try:
            response = requests.get(api_url)
            response.raise_for_status()
            download_url = response.json().get("href")
            if not download_url:
                print(f"Error: Could not get a direct download link for {public_url}"); return

            zip_filename = os.path.join(self.root_dir, "temp_yandex_download.zip")
            self._download_file(download_url, zip_filename)

            with zipfile.ZipFile(zip_filename, "r") as zip_ref:
                zip_ref.extractall(extract_to_folder)
            
            os.remove(zip_filename)
        except requests.exceptions.RequestException as e:
            print(f"A network or API error occurred: {e}")
        except Exception as e:
            print(f"An unexpected error occurred: {e}")

# Example usage remains the same
if __name__ == "__main__":
    manager = DatasetsManager(root_dir="my_3d_datasets")
    # manager.prepare_all_datasets()
    manager.prepare_abc_dataset()

DatasetsManager initialized. All data will be stored in 'my_3d_datasets'

[3/4] Processing ABC Dataset...


Processing ABC splits: 100%|██████████| 2/2 [03:26<00:00, 103.49s/it]


Cleaning up original extracted folder: my_3d_datasets/100k
ABC Dataset preparation complete.


2) ObjectNet3D Скачивается по этим ссылкам:
модели: wget ftp://cs.stanford.edu/cs/cvgl/ObjectNet3D/ObjectNet3D_cads.zip
images: wget ftp://cs.stanford.edu/cs/cvgl/ObjectNet3D/ObjectNet3D_images.zip  (большой архив)
wget ftp://cs.stanford.edu/cs/cvgl/ObjectNet3D/ObjectNet3D_annotations.zip (Для того что бы сопастовлять фото и cad)
Этот архив устрен гораздо сложнее.

архив с моделями выглядит так: [ObjectNet3D/CAD/off/[категория]/[номер, напрмер 01].obj В папке категории находятся еще  .obj файлы, но мне надо скачать только те который называются как :[номер].obj (например 03.obj). Там есть файлы например: 03_lalala_bebebe.obj, такие файлы обробатывать не нужно
архив с моделями выглядит так: [ObjectNet3D/CAD/off/[категория]/[номер, напрмер 01].obj
