In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install torch torchvision trimesh==3.9.36 plyfile sentence-transformers scikit-image numpy scipy

Collecting trimesh==3.9.36
  Downloading trimesh-3.9.36-py3-none-any.whl.metadata (17 kB)
Collecting plyfile
  Downloading plyfile-1.1-py3-none-any.whl.metadata (2.1 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvi

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sentence_transformers import SentenceTransformer
from trimesh.voxel import VoxelGrid
from skimage.measure import marching_cubes
from PIL import Image
import os
from scipy.ndimage import zoom

In [None]:
class TextTo3DModel(nn.Module):
    def __init__(self):
        super(TextTo3DModel, self).__init__()
        self.text_to_sdf = nn.Sequential(
            nn.Linear(384, 1024),
            nn.ReLU(),
            nn.Linear(1024, 2048),
            nn.ReLU(),
            nn.Linear(2048, 64*64*64)
        )

    def forward(self, text_embedding):
        batch_size = text_embedding.size(0)
        sdf_grid = self.text_to_sdf(text_embedding)
        sdf_grid = sdf_grid.view(batch_size, 64, 64, 64)
        return torch.sigmoid(sdf_grid)
def load_heritage_dataset(data_folder):
    images = []
    prompts = []
    monument_classes = [
        "victoria memorial",
        "tanjavur temple",
        "tajmahal",
        "Sun Temple Konark",
        "qutub_minar",
        "mysore_palace",
        "lotus_temple",
        "khajuraho",
        "jamali_kamali_tomb",
        "iron_pillar",
        "india gate pics",
        "Humayun_s Tomb",
        "hawa mahal pics",
        "golden temple",
        "Gateway of India",
        "Fatehpur Sikri",
        "Ellora Caves",
        "Chhota Imambara",
        "charminar",
        "Charar-E-Sharif",
        "basilica of bom jesus",
        "alai_minar",
        "alai_darwaza",
        "Ajanta Caves"
    ]

    for monument in monument_classes:
        folder_path = os.path.join(data_folder, monument)
        if os.path.isdir(folder_path):
            for filename in os.listdir(folder_path):
                if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(folder_path, filename)
                    try:
                        img = Image.open(img_path).convert('RGB')
                        img = img.resize((224, 224), Image.LANCZOS)
                        images.append(np.array(img))
                        prompt = f"{monument.replace('_', ' ').replace('s Tomb', '’s Tomb').title()} with detailed architecture and cultural features"
                        prompts.append(prompt)
                    except Exception as e:
                        print(f"Error loading {img_path}: {e}, skipping.")
        else:
            print(f"Warning: Folder {folder_path} not found, skipping.")
    print(f"Loaded {len(images)} images from {len(monument_classes)} classes.")
    return images, prompts

In [None]:
def image_to_sdf(image):

    gray = np.mean(image, axis=2).astype(np.float32)

    volume = np.tile(gray[:, :, np.newaxis], (1, 1, 64))

    binary_volume = (volume > 128).astype(np.uint8)
    downsampled_volume = zoom(binary_volume, (64/224, 64/224, 1), order=0).astype(np.uint8)

    if downsampled_volume.shape != (64, 64, 64):
        downsampled_volume = downsampled_volume[:64, :64, :64]

    voxel_grid = VoxelGrid(downsampled_volume)

    matrix = voxel_grid.matrix
    if matrix.shape != (64, 64, 64):
        matrix = np.pad(matrix, ((0, 64-matrix.shape[0]), (0, 64-matrix.shape[1]), (0, 64-matrix.shape[2])), mode='constant')
        matrix = matrix[:64, :64, :64]

    sdf = np.zeros((64, 64, 64))
    for i in range(64):
        for j in range(64):
            for k in range(64):
                sdf[i, j, k] = abs(matrix[i, j, k] - 0.5) if matrix[i, j, k] > 0 else 1.0
    return torch.tensor(sdf, dtype=torch.float32)


data_folder = "/content/drive/MyDrive/train"

images, prompts = load_heritage_dataset(data_folder)
text_encoder = SentenceTransformer('all-MiniLM-L6-v2')
text_embeddings = text_encoder.encode(prompts, show_progress_bar=True)

target_sdfs = torch.stack([image_to_sdf(img) for img in images])

dataset = list(zip(text_embeddings, target_sdfs))
dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TextTo3DModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-4)

criterion = nn.MSELoss()

Loaded 3081 images from 24 classes.


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/97 [00:00<?, ?it/s]

In [None]:

num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch_text_embeddings, batch_target_sdfs in dataloader:
        batch_text_embeddings = batch_text_embeddings.to(device).float()
        batch_target_sdfs = batch_target_sdfs.to(device)

        optimizer.zero_grad()
        output_sdfs = model(batch_text_embeddings)
        loss = criterion(output_sdfs, batch_target_sdfs)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")
torch.save(model.state_dict(), "/content/drive/MyDrive/train/heritage_3d_model.pth")
print("Model training completed and saved as heritage_3d_model.pth")

Epoch [1/50], Loss: 0.0682
Epoch [2/50], Loss: 0.0567
Epoch [3/50], Loss: 0.0566
Epoch [4/50], Loss: 0.0564
Epoch [5/50], Loss: 0.0560
Epoch [6/50], Loss: 0.0558
Epoch [7/50], Loss: 0.0558
Epoch [8/50], Loss: 0.0557
Epoch [9/50], Loss: 0.0556
Epoch [10/50], Loss: 0.0555
Epoch [11/50], Loss: 0.0554
Epoch [12/50], Loss: 0.0553
Epoch [13/50], Loss: 0.0553
Epoch [14/50], Loss: 0.0553
Epoch [15/50], Loss: 0.0552
Epoch [16/50], Loss: 0.0552
Epoch [17/50], Loss: 0.0552
Epoch [18/50], Loss: 0.0552
Epoch [19/50], Loss: 0.0552
Epoch [20/50], Loss: 0.0551
Epoch [21/50], Loss: 0.0552
Epoch [22/50], Loss: 0.0551
Epoch [23/50], Loss: 0.0551
Epoch [24/50], Loss: 0.0551
Epoch [25/50], Loss: 0.0551
Epoch [26/50], Loss: 0.0551
Epoch [27/50], Loss: 0.0550
Epoch [28/50], Loss: 0.0550
Epoch [29/50], Loss: 0.0550
Epoch [30/50], Loss: 0.0550
Epoch [31/50], Loss: 0.0550
Epoch [32/50], Loss: 0.0550
Epoch [33/50], Loss: 0.0550
Epoch [34/50], Loss: 0.0551
Epoch [35/50], Loss: 0.0551
Epoch [36/50], Loss: 0.0550
E