In [3]:
import bpy
import trimesh
import glob , os , shutil
import torch
import numpy as np
from plyfile import PlyData
import cv2
import argparse
import time
import PIL

## Run in blender

In [8]:
import bpy
import glob
local_path = "/Users/pataranansethpakdee/Documents/GitHub/"
vrm_filepath = local_path+"3D-Waifu-Model-Generator/Datasets/3D_VRMModel/"
processed_filepath = local_path + "3D-Waifu-Model-Generator/Datasets/3D_ProcessedModel/"
image_filepath = local_path + "3D-Waifu-Model-Generator/Datasets/2D_Image/"
vrm_files = glob.glob(f'{vrm_filepath}*')

def purge_orphans():
    bpy.ops.outliner.orphans_purge(do_local_ids=True, do_linked_ids=True, do_recursive=True)

def clean_scene():
    scene = bpy.context.scene
    bpy.data.scenes.new("Scene")
    bpy.data.scenes.remove(scene, do_unlink=True)
    purge_orphans()
    
def set_new_camera():
    scene = bpy.context.scene
    cam_data = bpy.data.cameras.new(name="Camera")
    cam = bpy.data.objects.new(name="Camera", object_data=cam_data)
    scene.collection.objects.link(cam)
    scene.camera = cam
    cam.location = (0, -5.3, 0.8)
    cam.rotation_euler = (1.5708, 0, 0) 

def new_scene():
    clean_scene()
    set_new_camera()

count = 0

for file in vrm_files:
    
    file_name = str(count).zfill(4)

    clean_scene()
    set_new_camera()
    bpy.ops.import_scene.vrm(filepath=file)
    scene = bpy.context.scene


    # Render settings
    scene.render.filepath = f'{image_filepath}{file_name}.png'
    scene.render.image_settings.file_format = 'PNG'
    scene.render.resolution_x = 1920
    scene.render.resolution_y = 1080

    # Render the image
    bpy.ops.render.render(write_still=True)

    #bpy.ops.export_scene.gltf(filepath=f"{processed_filepath}{file_name}.glb")
    bpy.ops.export_scene.gltf(filepath=f"{processed_filepath}{file_name}.glb", 
                         export_format='GLB', 
                         export_image_format='JPEG',
                         export_image_add_webp=True,
                         export_image_webp_fallback=True,
                         export_texcoords=True,
                         export_normals=True,
                         export_materials='EXPORT',
                         export_vertex_color='MATERIAL',
                         export_all_vertex_colors=True)
    count+=1



# Convert glb to pointcloud (ply)

Run this in Window 

In [9]:
#Convert .ply to point cloud
ply_path = "Datasets/3D_ProcessedModel"
ply_files = glob.glob(r"Datasets/3D_ProcessedModel/*")

for file in ply_files:
    file_name = (file.split('/')[-1]).split(".")[0]
    path = f"{ply_path}/{file_name}.glb"
    scene = trimesh.load(file)

    # Traverse all geometries (meshes) in the scene
    point_clouds = []
    for name, mesh in scene.geometry.items():
        vertices = mesh.vertices
        colors = mesh.visual.to_color().vertex_colors  # Optional: may be texture-based
        
        # Sample points from the mesh
        num_samples = 5000  # Adjust this number for more points
        if len(vertices) > num_samples:
            indices = np.random.choice(len(vertices), num_samples, replace=False)
            sampled_vertices = vertices[indices]
            sampled_colors = colors[indices]
        else:
            sampled_vertices = vertices
            sampled_colors = colors

        # Combine vertices and colors into a point cloud
        point_cloud_with_color = [(*v, *c[:3]) for v, c in zip(sampled_vertices, sampled_colors)]
        point_clouds.append(point_cloud_with_color)
    all_points = [point for pc in point_clouds for point in pc]
    points = np.array([p[:3] for p in all_points])
    colors = np.array([p[3:] for p in all_points]) / 255.0  # Normalize color values

    # Create Open3D point cloud
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)
    pcd.colors = o3d.utility.Vector3dVector(colors)
    o3d.io.write_point_cloud(f"Processed_Data/3D_PointCloud/{file_name}.ply", pcd)
#o3d.io.write_point_cloud(f"Processed_Data/3D_PointCloud/{file_name}.pts",pcd)




# DO NOT RUN

In [10]:
file_names = glob.glob("Processed_Data/3D_PointCloud/*")
img_path = "Processed_Data/360deg_img/"
comfy_path = "C:/Users/patar/Downloads/ComfyUI_windows_portable_nvidia/ComfyUI_windows_portable/ComfyUI/output"
names = []
for file_name in file_names:
    folder_name = file_name.split("\\")[-1].split(".")[0]
    names.append(folder_name)
    if(not os.path.exists(img_path+folder_name)):
        os.mkdir(img_path+folder_name)
comfy_files = glob.glob(comfy_path+"/*")
for name in names:
    for comfy_file in comfy_files:
        if(name==comfy_file.split("\\")[-1].split('.')[0]):
            shutil.move(comfy_file,img_path+name)
    

# Feature Extraction (SuperPoint)

In [11]:

def load_images(image_paths):
    images = []
    for path in image_paths:
        img = cv2.imread(path)
        images.append(img)
    return np.array(images)

In [12]:
!git clone https://github.com/magicleap/SuperPointPretrainedNetwork
!cd SuperPointPretrainedNetwork


Cloning into 'SuperPointPretrainedNetwork'...


In [13]:
class SuperPointNet(torch.nn.Module):
  """ Pytorch definition of SuperPoint Network. """
  def __init__(self):
    super(SuperPointNet, self).__init__()
    self.relu = torch.nn.ReLU(inplace=True)
    self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2)
    c1, c2, c3, c4, c5, d1 = 64, 64, 128, 128, 256, 256
    # Shared Encoder.
    self.conv1a = torch.nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1)
    self.conv1b = torch.nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1)
    self.conv2a = torch.nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1)
    self.conv2b = torch.nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1)
    self.conv3a = torch.nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1)
    self.conv3b = torch.nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1)
    self.conv4a = torch.nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1)
    self.conv4b = torch.nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1)
    # Detector Head.
    self.convPa = torch.nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
    self.convPb = torch.nn.Conv2d(c5, 65, kernel_size=1, stride=1, padding=0)
    # Descriptor Head.
    self.convDa = torch.nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
    self.convDb = torch.nn.Conv2d(c5, d1, kernel_size=1, stride=1, padding=0)

  def forward(self, x):
    """ Forward pass that jointly computes unprocessed point and descriptor
    tensors.
    Input
      x: Image pytorch tensor shaped N x 1 x H x W.
    Output
      semi: Output point pytorch tensor shaped N x 65 x H/8 x W/8.
      desc: Output descriptor pytorch tensor shaped N x 256 x H/8 x W/8.
    """
    # Shared Encoder.
    x = self.relu(self.conv1a(x))
    x = self.relu(self.conv1b(x))
    x = self.pool(x)
    x = self.relu(self.conv2a(x))
    x = self.relu(self.conv2b(x))
    x = self.pool(x)
    x = self.relu(self.conv3a(x))
    x = self.relu(self.conv3b(x))
    x = self.pool(x)
    x = self.relu(self.conv4a(x))
    x = self.relu(self.conv4b(x))
    # Detector Head.
    cPa = self.relu(self.convPa(x))
    semi = self.convPb(cPa)
    # Descriptor Head.
    cDa = self.relu(self.convDa(x))
    desc = self.convDb(cDa)
    dn = torch.norm(desc, p=2, dim=1) # Compute the norm.
    desc = desc.div(torch.unsqueeze(dn, 1)) # Divide by norm to normalize.
    return semi, desc
class SuperPointFrontend(object):
  """ Wrapper around pytorch net to help with pre and post image processing. """
  def __init__(self, weights_path, nms_dist, conf_thresh, nn_thresh,
               cuda=False):
    self.name = 'SuperPoint'
    self.cuda = cuda
    self.nms_dist = nms_dist
    self.conf_thresh = conf_thresh
    self.nn_thresh = nn_thresh # L2 descriptor distance for good match.
    self.cell = 8 # Size of each output cell. Keep this fixed.
    self.border_remove = 4 # Remove points this close to the border.

    # Load the network in inference mode.
    self.net = SuperPointNet()
    if cuda:
      # Train on GPU, deploy on GPU.
      self.net.load_state_dict(torch.load(weights_path))
      self.net = self.net.cuda()
    else:
      # Train on GPU, deploy on CPU.
      self.net.load_state_dict(torch.load(weights_path,
                               map_location=lambda storage, loc: storage))
    self.net.eval()

  def nms_fast(self, in_corners, H, W, dist_thresh):
   
    grid = np.zeros((H, W)).astype(int) # Track NMS data.
    inds = np.zeros((H, W)).astype(int) # Store indices of points.
    # Sort by confidence and round to nearest int.
    inds1 = np.argsort(-in_corners[2,:])
    corners = in_corners[:,inds1]
    rcorners = corners[:2,:].round().astype(int) # Rounded corners.
    # Check for edge case of 0 or 1 corners.
    if rcorners.shape[1] == 0:
      return np.zeros((3,0)).astype(int), np.zeros(0).astype(int)
    if rcorners.shape[1] == 1:
      out = np.vstack((rcorners, in_corners[2])).reshape(3,1)
      return out, np.zeros((1)).astype(int)
    # Initialize the grid.
    for i, rc in enumerate(rcorners.T):
      grid[rcorners[1,i], rcorners[0,i]] = 1
      inds[rcorners[1,i], rcorners[0,i]] = i
    # Pad the border of the grid, so that we can NMS points near the border.
    pad = dist_thresh
    grid = np.pad(grid, ((pad,pad), (pad,pad)), mode='constant')
    # Iterate through points, highest to lowest conf, suppress neighborhood.
    count = 0
    for i, rc in enumerate(rcorners.T):
      # Account for top and left padding.
      pt = (rc[0]+pad, rc[1]+pad)
      if grid[pt[1], pt[0]] == 1: # If not yet suppressed.
        grid[pt[1]-pad:pt[1]+pad+1, pt[0]-pad:pt[0]+pad+1] = 0
        grid[pt[1], pt[0]] = -1
        count += 1
    # Get all surviving -1's and return sorted array of remaining corners.
    keepy, keepx = np.where(grid==-1)
    keepy, keepx = keepy - pad, keepx - pad
    inds_keep = inds[keepy, keepx]
    out = corners[:, inds_keep]
    values = out[-1, :]
    inds2 = np.argsort(-values)
    out = out[:, inds2]
    out_inds = inds1[inds_keep[inds2]]
    return out, out_inds

  def run(self, img):
    assert img.ndim == 2, 'Image must be grayscale.'
    assert img.dtype == np.float32, 'Image must be float32.'
    H, W = img.shape[0], img.shape[1]
    inp = img.copy()
    inp = (inp.reshape(1, H, W))
    inp = torch.from_numpy(inp)
    inp = torch.autograd.Variable(inp).view(1, 1, H, W)
    if self.cuda:
      inp = inp.cuda()
    # Forward pass of network.
    outs = self.net.forward(inp)
    semi, coarse_desc = outs[0], outs[1]
    # Convert pytorch -> numpy.
    semi = semi.data.cpu().numpy().squeeze()
    # --- Process points.
    dense = np.exp(semi) # Softmax.
    dense = dense / (np.sum(dense, axis=0)+.00001) # Should sum to 1.
    # Remove dustbin.
    nodust = dense[:-1, :, :]
    # Reshape to get full resolution heatmap.
    Hc = int(H / self.cell)
    Wc = int(W / self.cell)
    nodust = nodust.transpose(1, 2, 0)
    heatmap = np.reshape(nodust, [Hc, Wc, self.cell, self.cell])
    heatmap = np.transpose(heatmap, [0, 2, 1, 3])
    heatmap = np.reshape(heatmap, [Hc*self.cell, Wc*self.cell])
    xs, ys = np.where(heatmap >= self.conf_thresh) # Confidence threshold.
    if len(xs) == 0:
      return np.zeros((3, 0)), None, None
    pts = np.zeros((3, len(xs))) # Populate point data sized 3xN.
    pts[0, :] = ys
    pts[1, :] = xs
    pts[2, :] = heatmap[xs, ys]
    pts, _ = self.nms_fast(pts, H, W, dist_thresh=self.nms_dist) # Apply NMS.
    inds = np.argsort(pts[2,:])
    pts = pts[:,inds[::-1]] # Sort by confidence.
    # Remove points along border.
    bord = self.border_remove
    toremoveW = np.logical_or(pts[0, :] < bord, pts[0, :] >= (W-bord))
    toremoveH = np.logical_or(pts[1, :] < bord, pts[1, :] >= (H-bord))
    toremove = np.logical_or(toremoveW, toremoveH)
    pts = pts[:, ~toremove]
    # --- Process descriptor.
    D = coarse_desc.shape[1]
    if pts.shape[1] == 0:
      desc = np.zeros((D, 0))
    else:
      # Interpolate into descriptor map using 2D point locations.
      samp_pts = torch.from_numpy(pts[:2, :].copy())
      samp_pts[0, :] = (samp_pts[0, :] / (float(W)/2.)) - 1.
      samp_pts[1, :] = (samp_pts[1, :] / (float(H)/2.)) - 1.
      samp_pts = samp_pts.transpose(0, 1).contiguous()
      samp_pts = samp_pts.view(1, 1, -1, 2)
      samp_pts = samp_pts.float()
      if self.cuda:
        samp_pts = samp_pts.cuda()
      desc = torch.nn.functional.grid_sample(coarse_desc, samp_pts)
      desc = desc.data.cpu().numpy().reshape(D, -1)
      desc /= np.linalg.norm(desc, axis=0)[np.newaxis, :]
    return pts, desc, heatmap
class PointTracker(object):
  """ Class to manage a fixed memory of points and descriptors that enables
  sparse optical flow point tracking.

  Internally, the tracker stores a 'tracks' matrix sized M x (2+L), of M
  tracks with maximum length L, where each row corresponds to:
  row_m = [track_id_m, avg_desc_score_m, point_id_0_m, ..., point_id_L-1_m].
  """

  def __init__(self, max_length, nn_thresh):
    if max_length < 2:
      raise ValueError('max_length must be greater than or equal to 2.')
    self.maxl = max_length
    self.nn_thresh = nn_thresh
    self.all_pts = []
    for n in range(self.maxl):
      self.all_pts.append(np.zeros((2, 0)))
    self.last_desc = None
    self.tracks = np.zeros((0, self.maxl+2))
    self.track_count = 0
    self.max_score = 9999

  def nn_match_two_way(self, desc1, desc2, nn_thresh):
    """
    Performs two-way nearest neighbor matching of two sets of descriptors, such
    that the NN match from descriptor A->B must equal the NN match from B->A.

    Inputs:
      desc1 - NxM numpy matrix of N corresponding M-dimensional descriptors.
      desc2 - NxM numpy matrix of N corresponding M-dimensional descriptors.
      nn_thresh - Optional descriptor distance below which is a good match.

    Returns:
      matches - 3xL numpy array, of L matches, where L <= N and each column i is
                a match of two descriptors, d_i in image 1 and d_j' in image 2:
                [d_i index, d_j' index, match_score]^T
    """
    assert desc1.shape[0] == desc2.shape[0]
    if desc1.shape[1] == 0 or desc2.shape[1] == 0:
      return np.zeros((3, 0))
    if nn_thresh < 0.0:
      raise ValueError('\'nn_thresh\' should be non-negative')
    # Compute L2 distance. Easy since vectors are unit normalized.
    dmat = np.dot(desc1.T, desc2)
    dmat = np.sqrt(2-2*np.clip(dmat, -1, 1))
    # Get NN indices and scores.
    idx = np.argmin(dmat, axis=1)
    scores = dmat[np.arange(dmat.shape[0]), idx]
    # Threshold the NN matches.
    keep = scores < nn_thresh
    # Check if nearest neighbor goes both directions and keep those.
    idx2 = np.argmin(dmat, axis=0)
    keep_bi = np.arange(len(idx)) == idx2[idx]
    keep = np.logical_and(keep, keep_bi)
    idx = idx[keep]
    scores = scores[keep]
    # Get the surviving point indices.
    m_idx1 = np.arange(desc1.shape[1])[keep]
    m_idx2 = idx
    # Populate the final 3xN match data structure.
    matches = np.zeros((3, int(keep.sum())))
    matches[0, :] = m_idx1
    matches[1, :] = m_idx2
    matches[2, :] = scores
    return matches

  def get_offsets(self):
    """ Iterate through list of points and accumulate an offset value. Used to
    index the global point IDs into the list of points.

    Returns
      offsets - N length array with integer offset locations.
    """
    # Compute id offsets.
    offsets = []
    offsets.append(0)
    for i in range(len(self.all_pts)-1): # Skip last camera size, not needed.
      offsets.append(self.all_pts[i].shape[1])
    offsets = np.array(offsets)
    offsets = np.cumsum(offsets)
    return offsets

  def update(self, pts, desc):
    """ Add a new set of point and descriptor observations to the tracker.

    Inputs
      pts - 3xN numpy array of 2D point observations.
      desc - DxN numpy array of corresponding D dimensional descriptors.
    """
    if pts is None or desc is None:
      print('PointTracker: Warning, no points were added to tracker.')
      return
    assert pts.shape[1] == desc.shape[1]
    # Initialize last_desc.
    if self.last_desc is None:
      self.last_desc = np.zeros((desc.shape[0], 0))
    # Remove oldest points, store its size to update ids later.
    remove_size = self.all_pts[0].shape[1]
    self.all_pts.pop(0)
    self.all_pts.append(pts)
    # Remove oldest point in track.
    self.tracks = np.delete(self.tracks, 2, axis=1)
    # Update track offsets.
    for i in range(2, self.tracks.shape[1]):
      self.tracks[:, i] -= remove_size
    self.tracks[:, 2:][self.tracks[:, 2:] < -1] = -1
    offsets = self.get_offsets()
    # Add a new -1 column.
    self.tracks = np.hstack((self.tracks, -1*np.ones((self.tracks.shape[0], 1))))
    # Try to append to existing tracks.
    matched = np.zeros((pts.shape[1])).astype(bool)
    matches = self.nn_match_two_way(self.last_desc, desc, self.nn_thresh)
    for match in matches.T:
      # Add a new point to it's matched track.
      id1 = int(match[0]) + offsets[-2]
      id2 = int(match[1]) + offsets[-1]
      found = np.argwhere(self.tracks[:, -2] == id1)
      if found.shape[0] > 0:
        matched[int(match[1])] = True
        row = int(found)
        self.tracks[row, -1] = id2
        if self.tracks[row, 1] == self.max_score:
          # Initialize track score.
          self.tracks[row, 1] = match[2]
        else:
          # Update track score with running average.
          # NOTE(dd): this running average can contain scores from old matches
          #           not contained in last max_length track points.
          track_len = (self.tracks[row, 2:] != -1).sum() - 1.
          frac = 1. / float(track_len)
          self.tracks[row, 1] = (1.-frac)*self.tracks[row, 1] + frac*match[2]
    # Add unmatched tracks.
    new_ids = np.arange(pts.shape[1]) + offsets[-1]
    new_ids = new_ids[~matched]
    new_tracks = -1*np.ones((new_ids.shape[0], self.maxl + 2))
    new_tracks[:, -1] = new_ids
    new_num = new_ids.shape[0]
    new_trackids = self.track_count + np.arange(new_num)
    new_tracks[:, 0] = new_trackids
    new_tracks[:, 1] = self.max_score*np.ones(new_ids.shape[0])
    self.tracks = np.vstack((self.tracks, new_tracks))
    self.track_count += new_num # Update the track count.
    # Remove empty tracks.
    keep_rows = np.any(self.tracks[:, 2:] >= 0, axis=1)
    self.tracks = self.tracks[keep_rows, :]
    # Store the last descriptors.
    self.last_desc = desc.copy()
    return

  def get_tracks(self, min_length):
    """ Retrieve point tracks of a given minimum length.
    Input
      min_length - integer >= 1 with minimum track length
    Output
      returned_tracks - M x (2+L) sized matrix storing track indices, where
        M is the number of tracks and L is the maximum track length.
    """
    if min_length < 1:
      raise ValueError('\'min_length\' too small.')
    valid = np.ones((self.tracks.shape[0])).astype(bool)
    good_len = np.sum(self.tracks[:, 2:] != -1, axis=1) >= min_length
    # Remove tracks which do not have an observation in most recent frame.
    not_headless = (self.tracks[:, -1] != -1)
    keepers = np.logical_and.reduce((valid, good_len, not_headless))
    returned_tracks = self.tracks[keepers, :].copy()
    return returned_tracks

  def draw_tracks(self, out, tracks):
    """ Visualize tracks all overlayed on a single image.
    Inputs
      out - numpy uint8 image sized HxWx3 upon which tracks are overlayed.
      tracks - M x (2+L) sized matrix storing track info.
    """
    # Store the number of points per camera.
    pts_mem = self.all_pts
    N = len(pts_mem) # Number of cameras/images.
    # Get offset ids needed to reference into pts_mem.
    offsets = self.get_offsets()
    # Width of track and point circles to be drawn.
    stroke = 1
    # Iterate through each track and draw it.
    for track in tracks:
      clr = myjet[int(np.clip(np.floor(track[1]*10), 0, 9)), :]*255
      for i in range(N-1):
        if track[i+2] == -1 or track[i+3] == -1:
          continue
        offset1 = offsets[i]
        offset2 = offsets[i+1]
        idx1 = int(track[i+2]-offset1)
        idx2 = int(track[i+3]-offset2)
        pt1 = pts_mem[i][:2, idx1]
        pt2 = pts_mem[i+1][:2, idx2]
        p1 = (int(round(pt1[0])), int(round(pt1[1])))
        p2 = (int(round(pt2[0])), int(round(pt2[1])))
        cv2.line(out, p1, p2, clr, thickness=stroke, lineType=16)
        # Draw end points of each track.
        if i == N-2:
          clr2 = (255, 0, 0)
          cv2.circle(out, p2, stroke, clr2, -1, lineType=16)
class VideoStreamer(object):
  """ Class to help process image streams. Three types of possible inputs:"
    1.) USB Webcam.
    2.) A directory of images (files in directory matching 'img_glob').
    3.) A video file, such as an .mp4 or .avi file.
  """
  def __init__(self, basedir, height, width):
    self.cap = []
    self.camera = False
    self.listing = []
    self.sizer = [height, width]
    self.i = 0

    self.maxlen = 1000000
    search = os.path.join(basedir, '*')
    self.listing = glob.glob(search)
    self.listing.sort()
    self.listing = self.listing[::1]
    self.maxlen = len(self.listing)
    if self.maxlen == 0:
      raise IOError('No images were found (maybe bad \'--img_glob\' parameter?)')

  def read_image(self, impath, img_size):
    grayim = cv2.imread(impath, 0)
    if grayim is None:
      raise Exception('Error reading image %s' % impath)
    # Image is resized via opencv.
    interp = cv2.INTER_AREA
    grayim = cv2.resize(grayim, (img_size[1], img_size[0]), interpolation=interp)
    grayim = (grayim.astype('float32') / 255.)
    return grayim

  def next_frame(self):
    """ Return the next frame, and increment internal counter.
    Returns
       image: Next H x W image.
       status: True or False depending whether image was loaded.
    """
    if self.i == self.maxlen:
      return (None, False)
    if self.camera:
      ret, input_image = self.cap.read()
      if ret is False:
        print('VideoStreamer: Cannot get image from camera (maybe bad --camid?)')
        return (None, False)
      if self.video_file:
        self.cap.set(cv2.CAP_PROP_POS_FRAMES, self.listing[self.i])
      input_image = cv2.resize(input_image, (self.sizer[1], self.sizer[0]),
                               interpolation=cv2.INTER_AREA)
      input_image = cv2.cvtColor(input_image, cv2.COLOR_RGB2GRAY)
      input_image = input_image.astype('float')/255.0
    else:
      image_file = self.listing[self.i]
      input_image = self.read_image(image_file, self.sizer)
    # Increment internal counter.
    self.i = self.i + 1
    input_image = input_image.astype('float32')
    return (input_image, True)




  # Parse command line arguments.
myjet = np.array([[0.        , 0.        , 0.5       ],
                  [0.        , 0.        , 0.99910873],
                  [0.        , 0.37843137, 1.        ],
                  [0.        , 0.83333333, 1.        ],
                  [0.30044276, 1.        , 0.66729918],
                  [0.66729918, 1.        , 0.30044276],
                  [1.        , 0.90123457, 0.        ],
                  [1.        , 0.48002905, 0.        ],
                  [0.99910873, 0.07334786, 0.        ],
                  [0.5       , 0.        , 0.        ]])

In [14]:
input_path = 'Processed_Data/360deg_img'
output_path = 'Processed_Data/Feature Map'
input_dirs = glob.glob(input_path+'/*')
img_w = 512
img_h = 512
weight_path = 'SuperPointPretrainedNetwork/superpoint_v1.pth'
dist = 4
conf_thresh=0.015
nn_thresh = 0.7
GPU = False
fe = SuperPointFrontend(weights_path=weight_path,
                        nms_dist=dist,
                        conf_thresh=conf_thresh,
                        nn_thresh=nn_thresh,
                        cuda=GPU)
tracker = PointTracker(5, nn_thresh=fe.nn_thresh)

def ExtractFeature(input_dirs,output_dirs):
    vs = VideoStreamer(input_dirs, 512, 512)

    win = 'SuperPoint Tracker'
    # Font parameters for visualizaton.
    font = cv2.FONT_HERSHEY_DUPLEX
    font_clr = (255, 255, 255)
    font_pt = (4, 12)
    font_sc = 0.4

    if not os.path.exists(output_dirs):
        os.makedirs(output_dirs)
    while True:

        start = time.time()

        # Get a new image.
        img, status = vs.next_frame()
        if status is False:
            break

        # Get points and descriptors.
        pts, desc, heatmap = fe.run(img)

        # Add points and descriptors to the tracker.
        tracker.update(pts, desc)

        # Get tracks for points which were match successfully across all frames.
        tracks = tracker.get_tracks(2)

        # Primary output - Show point tracks overlayed on top of input image.
        out1 = (np.dstack((img, img, img)) * 255.).astype('uint8')
        tracks[:, 1] /= float(fe.nn_thresh) # Normalize track scores to [0,1].
        tracker.draw_tracks(out1, tracks)
        if True:
            cv2.putText(out1, 'Point Tracks', font_pt, font, font_sc, font_clr, lineType=16)

        # Extra output -- Show current point detections.
        out2 = (np.dstack((img, img, img)) * 255.).astype('uint8')
        for pt in pts.T:
            pt1 = (int(round(pt[0])), int(round(pt[1])))
            cv2.circle(out2, pt1, 1, (0, 255, 0), -1, lineType=16)
        cv2.putText(out2, 'Raw Point Detections', font_pt, font, font_sc, font_clr, lineType=16)

        # Extra output -- Show the point confidence heatmap.
        if heatmap is not None:
            min_conf = 0.001
            heatmap[heatmap < min_conf] = min_conf
            heatmap = -np.log(heatmap)
            heatmap = (heatmap - heatmap.min()) / (heatmap.max() - heatmap.min() + .00001)
            out3 = myjet[np.round(np.clip(heatmap*10, 0, 9)).astype('int'), :]
            out3 = (out3*255).astype('uint8')
        else:
            out3 = np.zeros_like(out2)
        cv2.putText(out3, 'Raw Point Confidences', font_pt, font, font_sc, font_clr, lineType=16)

        # Resize final output.
        if False:
            out = np.hstack((out1, out2, out3))
            out = cv2.resize(out, (3*2*512, 2*512))
        else:
            out = cv2.resize(out2, (2*512, 2*512))

        # Optionally write images to disk.
        if True:
            out_file = os.path.join(output_dirs, 'frame_%05d.png' % vs.i)
            cv2.imwrite(out_file, out)


    print(f'==> Finshed {output_dirs}.')

  self.net.load_state_dict(torch.load(weights_path,


In [15]:
for dirPath in input_dirs:
    folderName = dirPath.split("/")[-1]
    outputPath = os.path.join(output_path,folderName)
    ExtractFeature(dirPath,outputPath)

  row = int(found)


==> Finshed Processed_Data/Feature Map\360deg_img\0000.
==> Finshed Processed_Data/Feature Map\360deg_img\0001.


In [16]:
import cv2
import numpy as np
import os

def match_images_across_multiple(img_folder,outputFolder, fe, nn_thresh=0.7, save_matches=False):
    # Get all image paths from the folder
    img_paths = sorted(glob.glob((img_folder+'/*')))
    num_images = len(img_paths)
    assert num_images >= 2, "Need at least two images for matching."

    all_matches = []  # To store matches across all images
    if not os.path.exists(outputFolder):
        os.makedirs(outputFolder)
    # Iterate over image pairs (i, i+1)
    for i in range(num_images - 1):

        # Read consecutive images
        img1 = cv2.imread(img_paths[i], cv2.IMREAD_GRAYSCALE)
        img2 = cv2.imread(img_paths[i+1], cv2.IMREAD_GRAYSCALE)

        img1 = img1.astype(np.float32) / 255.0
        img2 = img2.astype(np.float32) / 255.0

        # Extract keypoints and descriptors using SuperPoint
        pts1, desc1, _ = fe.run(img1)
        pts2, desc2, _ = fe.run(img2)

        # Match descriptors between img1 and img2
        good_matches = feature_match(desc1, desc2, nn_thresh)
        
        # Visualize matches if desired
        if save_matches:
            visualize_and_save(img1, img2, pts1, pts2, good_matches, i,outputFolder)
        match_data = [(m.queryIdx, m.trainIdx, m.distance) for m in good_matches]

        # Save matches for this image pair as a .npy file
        match_file = os.path.join(outputFolder, f'matches_{i}_{i + 1}.npy')
        np.save(match_file, match_data)

        all_matches.append(good_matches)

    return all_matches

def feature_match(desc1, desc2, nn_thresh=0.7):
    """Performs nearest neighbor matching between two descriptors."""
    bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True)
    desc1 = desc1.T
    desc2 = desc2.T
    matches = bf.match(desc1, desc2)
    
    # Filter good matches
    good_matches = [m for m in matches if m.distance < nn_thresh]
    return good_matches

def visualize_and_save(img1, img2, pts1, pts2, matches, img_idx,outputFolder):
    """
    Visualize and save the matches between two images.

    Args:
        img1: First image (float32 grayscale).
        img2: Second image (float32 grayscale).
        pts1: Keypoints from the first image.
        pts2: Keypoints from the second image.
        matches: Good matches between descriptors of the two images.
        img_idx: Index of the image pair being processed.
    """
    # Convert images back to uint8 for visualization
    img1_u8 = (img1 * 255).astype(np.uint8)
    img2_u8 = (img2 * 255).astype(np.uint8)

    # Convert keypoints for visualization
    kp1 = [cv2.KeyPoint(pt[0], pt[1], 1) for pt in pts1.T]
    kp2 = [cv2.KeyPoint(pt[0], pt[1], 1) for pt in pts2.T]

    # Draw matches
    out_img = cv2.drawMatches(img1_u8, kp1, img2_u8, kp2, matches, None)

    # Save output image
    out_file = f'{outputFolder}/matches_{img_idx}_{img_idx+1}.png'
    cv2.imwrite(out_file, out_img)


# Define parameters and run matching across 21 images
img_folder = glob.glob('Processed_Data/360deg_img/*')
weight_path = 'SuperPointPretrainedNetwork/superpoint_v1.pth'
dist = 4
conf_thresh = 0.015
nn_thresh = 0.7
GPU = False

# Initialize SuperPointFrontend
fe = SuperPointFrontend(weights_path=weight_path,
                        nms_dist=dist,
                        conf_thresh=conf_thresh,
                        nn_thresh=nn_thresh,
                        cuda=GPU)

# Run matching across 21 image

  self.net.load_state_dict(torch.load(weights_path,


In [17]:

for folder in img_folder:
    name = folder.split('/')[-1]
    outputFolder = os.path.join('Processed_Data/Feature Match',name)
    matched = match_images_across_multiple(folder, outputFolder, fe, nn_thresh, save_matches=False)


In [18]:
len(matched[0])

92

# SfM

In [19]:
import pycolmap
import os

def run_sfm_with_colmap(image_dir, output_dir, ply_point_cloud=None):
    # Initialize COLMAP's SfM
    recon = pycolmap.Reconstruction()

    # Step 1: Feature extraction
    pycolmap.extract_features(image_path=image_dir, database_path=f'{output_dir}/database.db')

    # Step 2: Feature matching
    pycolmap.match_sequential(database_path=f'{output_dir}/database.db')

    # Step 3: Run SfM pipeline
    recon.sparse_reconstruction(database_path=f'{output_dir}/database.db', 
                                image_dir=image_dir,
                                output_dir=output_dir)
    
    # Step 4: Optionally load your training point cloud
    if ply_point_cloud:
        recon.import_points(ply_point_cloud)

    # Step 5: Export the final point cloud
    recon.export_ply(os.path.join(output_dir, 'dense_point_cloud.ply'))

    return recon

# Set directories
image_dir = "Processed_Data/360deg_img"
output_dir = "SfM_Output"
ply_point_cloud = "path_to_your_point_cloud.ply"  # Optional, if used

# Run SfM pipeline
sfm_result = run_sfm_with_colmap('Processed_Data/360deg_img/0000', 'Processed_Data', 'Processed_Data/3D_PointCloud/0000.ply')
print("SfM completed. Dense point cloud saved to", os.path.join(output_dir, 'dense_point_cloud.ply'))



ModuleNotFoundError: No module named 'pycolmap'

# MVS

In [4]:
import torch
import glob
import os
import numpy as np
from plyfile import PlyData
import PIL
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import open3d as o3d
import lightning as L
from lightning.fabric import Fabric
from torch.amp import autocast, GradScaler
from functools import partial
from torch.utils.checkpoint import checkpoint

# Set device and configure PyTorch settings
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.set_float32_matmul_precision('high')
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = True

# Directory paths
image_dir = "/content/3D-Waifu-Model-Generator/Processed_Data/360deg_img"
point_cloud_dir = "/content/3D-Waifu-Model-Generator/Processed_Data/3D_PointCloud"

class OptimizedMVSNet(nn.Module):
    def __init__(self):
        super(OptimizedMVSNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.gn1 = nn.GroupNorm(4, 32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.gn2 = nn.GroupNorm(8, 64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.gn3 = nn.GroupNorm(16, 128)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.gn4 = nn.GroupNorm(32, 256)

        # Depth prediction layer
        self.depth_pred = nn.Conv2d(256, 1, kernel_size=3, padding=1)

    def forward(self, x):
        batch_size, num_images, height, width, channels = x.size()
        x = x.view(batch_size * num_images, channels, height, width)

        # Apply convolutional layers with in-place ReLU, GroupNorm, and gradient checkpointing
        x = F.relu(checkpoint(self._forward_block, self.conv1, self.gn1, x), inplace=True)
        x = F.relu(checkpoint(self._forward_block, self.conv2, self.gn2, x), inplace=True)
        x = F.relu(checkpoint(self._forward_block, self.conv3, self.gn3, x), inplace=True)
        x = F.relu(checkpoint(self._forward_block, self.conv4, self.gn4, x), inplace=True)

        depth = self.depth_pred(x)
        depth = depth.view(batch_size, num_images, 1, height, width)
        return depth

    def _forward_block(self, conv, norm, x):
        return norm(conv(x))

def depth_loss(predicted_depth, ground_truth_depth):
    return F.mse_loss(predicted_depth, ground_truth_depth, reduction='mean')

def train_mvsnet(dataloader, point_cloud_dir, num_epochs=10, lr=5e-5):
    fabric = Fabric(accelerator='cuda', devices=1, precision='bf16-true')
    fabric.launch()
    
    with fabric.init_module():
        model = OptimizedMVSNet().to(torch.bfloat16)  # Use bfloat16 for memory efficiency
    
    optimizer = optim.AdamW(model.parameters(), lr=lr)  # AdamW is more memory efficient
    num_steps = num_epochs * len(dataloader)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_steps)
    
    model, optimizer = fabric.setup(model, optimizer)
    dataloader = fabric.setup_dataloaders(dataloader)

    for epoch in range(num_epochs):
        model.train()
        for batch in dataloader:
            images, ground_truth_depth = batch
            images = images.to(device)
            ground_truth_depth = ground_truth_depth.to(device)

            # Adjust the ground_truth_depth to match the model output shape
            #ground_truth_depth = ground_truth_depth.unsqueeze(1).expand(-1, 21, -1, -1)  # Shape: [batch_size, 21, 1, height, width]

            # Mixed-precision forward pass
            with autocast(device_type='cuda', dtype=torch.bfloat16):  # Ensure bfloat16 mixed precision
                predicted_depth = model(images)
                loss = depth_loss(predicted_depth, ground_truth_depth)

            # Backward pass
            optimizer.zero_grad()
            fabric.backward(loss)
            optimizer.step()
            scheduler.step()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss:.4f}')

    print("Training complete!")
    torch.save(model.state_dict(), '/content/3D-Waifu-Model-Generator/optimized_mvsnet.pth')
    print("Model saved as 'optimized_mvsnet.pth'")
    return model


def create_depth_map_from_point_cloud_orthographic(point_cloud, image_shape):
    points = np.asarray(point_cloud.points)

    # Normalize the points to fit the image space
    x_min, x_max = points[:, 0].min(), points[:, 0].max()
    y_min, y_max = points[:, 1].min(), points[:, 1].max()
    z_min, z_max = points[:, 2].min(), points[:, 2].max()

    # Create an empty depth map
    depth_map = np.full(image_shape, np.inf)

    # Convert point cloud coordinates to image coordinates
    height, width = image_shape
    x_image = np.clip(((points[:, 0] - x_min) / (x_max - x_min) * width).astype(int), 0, width - 1)
    y_image = np.clip(((points[:, 1] - y_min) / (y_max - y_min) * height).astype(int), 0, height - 1)

    # Assign Z values to pixels
    for i in range(points.shape[0]):
        depth_value = points[i, 2]
        if depth_map[y_image[i], x_image[i]] > depth_value:
            depth_map[y_image[i], x_image[i]] = depth_value

    depth_map[depth_map == np.inf] = 0
    return torch.tensor(depth_map, dtype=torch.float32)

def load_point_cloud_to_depth_map(point_cloud_dir, image_shape):
    point_cloud_files = [os.path.join(point_cloud_dir, f).replace("\\", "/") for f in os.listdir(point_cloud_dir) if f.endswith('.ply')]
    depth_maps = []

    for pc_file in point_cloud_files:
        pcd = o3d.io.read_point_cloud(pc_file)
        
        # Generate the depth map
        depth_map = create_depth_map_from_point_cloud_orthographic(pcd, image_shape)

        # Resize depth map to match the model output
        depth_map_tensor = depth_map.unsqueeze(0).unsqueeze(0)  # [1, 1, H, W] shape
        resized_depth_map = F.interpolate(depth_map_tensor, size=(576, 576), mode='bilinear', align_corners=False)
        
        depth_maps.append(resized_depth_map.squeeze(0))  # Remove the batch dimension

    return torch.stack(depth_maps)  # Stack all depth maps into a tensor

def create_dataloader(image_dir, point_cloud_dir, batch_size=1):
    images = load_images(image_dir)
    ground_truth_depth_maps = load_point_cloud_to_depth_map(point_cloud_dir, images[0].shape)
    dataset = torch.utils.data.TensorDataset(torch.tensor(images, dtype=torch.float32), ground_truth_depth_maps)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
    return dataloader

def load_images(image_paths):
    output = []
    path = glob.glob(f"{image_paths}/*")
    for dir in path:
        images = []
        for path in glob.glob(f"{dir}/*"):
            img = PIL.Image.open(path)
            images.append(np.array(img))
        output.append(images)
    return np.array(output)

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [5]:
import PIL.Image


def load_images(image_paths):
    output = []
    path = glob.glob(f"{image_paths}/*")
    for dir in path:
        images = []
        for path in glob.glob(f"{dir}/*"):
            path = str(path).replace("\\",'/')
            #print(path)
            img = PIL.Image.open(path)
            images.append(img)
        output.append(images)
        print(np.array(output).shape)
    return np.array(output)

In [6]:

image_dir = "Processed_Data/360deg_img"
point_cloud_dir = "Processed_Data/3D_PointCloud"

images = load_images(image_dir)
ground_truth_depth_maps = load_point_cloud_to_depth_map(point_cloud_dir, (576, 576))

dataset = torch.utils.data.TensorDataset(torch.tensor(images, dtype=torch.float32), ground_truth_depth_maps)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True)

(1, 21, 576, 576, 3)
(2, 21, 576, 576, 3)


In [None]:
import torch
import open3d as o3d


# Set directories
image_dir = "Processed_Data/360deg_img"
point_cloud_dir = "Training_Point_Clouds"
output_dir = ""

# Run MVS pipeline
model = train_mvsnet(dataloader, point_cloud_dir)


In [24]:
import torch
import glob
import os
import numpy as np
from plyfile import PlyData
import PIL
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import open3d as o3d
import lightning as L
from lightning.fabric import Fabric
from torch.amp import autocast, GradScaler
from functools import partial
from torch.utils.checkpoint import checkpoint

# Set device and configure PyTorch settings
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.set_float32_matmul_precision('high')
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = True

# Directory paths
image_dir = "/content/3D-Waifu-Model-Generator/Processed_Data/360deg_img"
point_cloud_dir = "/content/3D-Waifu-Model-Generator/Processed_Data/3D_PointCloud"

class PointCloudMVSNet(nn.Module):
    def __init__(self):
        super(PointCloudMVSNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.gn1 = nn.GroupNorm(4, 32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.gn2 = nn.GroupNorm(8, 64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.gn3 = nn.GroupNorm(16, 128)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.gn4 = nn.GroupNorm(32, 256)

        # Prediction layers
        self.coord_pred = nn.Conv2d(256, 3, kernel_size=3, padding=1)  # Predict x, y, z coordinates
        self.rgb_pred = nn.Conv2d(256, 3, kernel_size=3, padding=1)    # Predict RGB values

    def forward(self, x):
        batch_size, num_images, height, width, channels = x.size()
        x = x.view(batch_size * num_images, channels, height, width)

        # Apply convolutional layers
        x = F.relu(checkpoint(self._forward_block, self.conv1, self.gn1, x), inplace=True)
        x = F.relu(checkpoint(self._forward_block, self.conv2, self.gn2, x), inplace=True)
        x = F.relu(checkpoint(self._forward_block, self.conv3, self.gn3, x), inplace=True)
        x = F.relu(checkpoint(self._forward_block, self.conv4, self.gn4, x), inplace=True)

        # Predict 3D coordinates and RGB values
        coords = self.coord_pred(x)  # [batch_size * num_images, 3, height, width]
        rgb = self.rgb_pred(x)       # [batch_size * num_images, 3, height, width]

        # Reshape to get final shape as [batch_size, num_images, height * width, 6]
        coords = coords.view(batch_size, num_images, 3, height * width)  # Flatten spatial dimensions
        rgb = rgb.view(batch_size, num_images, 3, height * width)        # Flatten spatial dimensions
        
        # Combine coordinates and RGB
        point_cloud = torch.cat([coords, rgb], dim=2)  # [batch_size, num_images, 6, height * width]
        
        return point_cloud.permute(0, 2, 1, 3).contiguous().view(batch_size, 6, -1)  # [batch_size, 6, num_points]
    
    def _forward_block(self, conv, norm, x):
        return norm(conv(x))

def point_cloud_loss(predicted_coords, predicted_rgb, ground_truth_coords, ground_truth_rgb):
    coord_loss = F.mse_loss(predicted_coords, ground_truth_coords, reduction='mean')  # Loss for coordinates
    rgb_loss = F.mse_loss(predicted_rgb, ground_truth_rgb, reduction='mean')  # Loss for RGB values
    return coord_loss + rgb_loss

def train_mvsnet(dataloader, point_cloud_dir, num_epochs=10, lr=0.001):
    fabric = Fabric(accelerator='cuda', devices=1, precision='bf16-true')
    fabric.launch()
    
    with fabric.init_module():
        model = PointCloudMVSNet().to(torch.bfloat16)  # Use bfloat16 for memory efficiency
    
    optimizer = optim.AdamW(model.parameters(), lr=lr)  
    num_steps = num_epochs * len(dataloader)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_steps)
    
    model, optimizer = fabric.setup(model, optimizer)
    dataloader = fabric.setup_dataloaders(dataloader)

    for epoch in range(num_epochs):
        model.train()
        for batch in dataloader:
            images, ground_truth = batch
            ground_truth_coords, ground_truth_rgb = ground_truth[:, :, :3], ground_truth[:, :, 3:]
            images = images.to(device)

            # Mixed-precision forward pass
            with autocast(device_type='cuda', dtype=torch.bfloat16):
                predicted = model(images)
                predicted_coords = predicted[:, :, :3]
                predicted_rgb = predicted[:, :, 3:]
                ground_truth_coords = ground_truth_coords.to(device)
                ground_truth_rgb = ground_truth_rgb.to(device)

                loss = point_cloud_loss(predicted_coords, predicted_rgb, ground_truth_coords, ground_truth_rgb)

            # Backward pass
            optimizer.zero_grad()
            fabric.backward(loss)
            optimizer.step()
            scheduler.step()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss:.4f}')

    print("Training complete!")
    torch.save(model.state_dict(), '/content/3D-Waifu-Model-Generator/point_cloud_mvsnet.pth')
    print("Model saved as 'point_cloud_mvsnet.pth'")
    return model

def load_point_cloud_with_rgb(point_cloud_dir, image_shape):
    point_cloud_files = [os.path.join(point_cloud_dir, f).replace("\\", "/") for f in os.listdir(point_cloud_dir) if f.endswith('.ply')]
    point_clouds = []

    for pc_file in point_cloud_files:
        pcd = o3d.io.read_point_cloud(pc_file)
        points = np.asarray(pcd.points)
        colors = np.asarray(pcd.colors)  # Extract RGB values

        point_cloud = np.concatenate([points, colors], axis=1)  # Concatenate [x, y, z] with [r, g, b]
        point_clouds.append(point_cloud)

    return torch.tensor(point_clouds, dtype=torch.float32)
def generate_point_cloud_from_model(model, images):
    model.eval()
    with torch.no_grad():
        predicted_point_cloud = model(images.to(device))
        # predicted_point_cloud will have shape [batch_size, num_points, 6]
        return predicted_point_cloud


def create_dataloader(image_dir, point_cloud_dir, batch_size=1):
    images = load_images(image_dir)
    pcds = load_point_cloud_with_rgb(point_cloud_dir, images[0].shape)
    dataset = torch.utils.data.TensorDataset(torch.tensor(images, dtype=torch.float32), ground_truth_depth_maps)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
    return dataloader

def load_images(image_paths):
    output = []
    path = glob.glob(f"{image_paths}/*")
    for dir in path:
        images = []
        for path in glob.glob(f"{dir}/*"):
            img = PIL.Image.open(path)
            images.append(np.array(img))
        output.append(images)
    return np.array(output)


In [31]:
images = load_images("Processed_Data/360deg_img")
#pcds = load_point_cloud_with_rgb("Processed_Data\3D_PointCloud", images[0].shape)

In [30]:
images

array([[[[[ 0,  0,  0],
          [ 0,  0,  0],
          [ 0,  0,  0],
          ...,
          [ 0,  0,  0],
          [ 2,  0,  3],
          [ 0,  1,  0]],

         [[ 0,  0,  0],
          [ 0,  0,  0],
          [ 0,  0,  0],
          ...,
          [ 0,  0,  0],
          [ 0,  0,  0],
          [ 0,  0,  0]],

         [[ 0,  0,  0],
          [ 0,  0,  0],
          [ 0,  0,  1],
          ...,
          [ 0,  0,  0],
          [ 0,  0,  1],
          [ 0,  0,  0]],

         ...,

         [[ 0,  0,  0],
          [ 0,  0,  2],
          [ 0,  0,  0],
          ...,
          [ 0,  0,  0],
          [ 0,  0,  1],
          [ 0,  0,  0]],

         [[ 0,  0,  0],
          [ 2,  0,  0],
          [ 0,  0,  0],
          ...,
          [ 1,  0,  0],
          [ 0,  0,  1],
          [ 0,  0,  0]],

         [[ 0,  1,  1],
          [ 0,  0,  3],
          [ 0,  0,  1],
          ...,
          [ 2,  0,  2],
          [ 0,  0,  2],
          [ 5,  0,  3]]],


        [[[ 0,  0

In [25]:
dataloader = create_dataloader("Processed_Data\360deg_img","")
model = train_mvsnet(dataloader,point_cloud_dir)

IndexError: index 0 is out of bounds for axis 0 with size 0

# Visualize

In [7]:
model = OptimizedMVSNet()
model.load_state_dict(torch.load('optimized_mvsnet.pth'))

  model.load_state_dict(torch.load('optimized_mvsnet.pth'))


<All keys matched successfully>

In [8]:
image_dir = "Processed_Data/360deg_img"
point_cloud_dir = "Processed_Data/3D_PointCloud"

In [9]:
for a , b in dataloader:
    print(a.shape)

torch.Size([1, 21, 576, 576, 3])
torch.Size([1, 21, 576, 576, 3])


In [10]:
model.eval()
with torch.no_grad():
    predicted_depth = model(a)

  return fn(*args, **kwargs)


In [12]:
c=predicted_depth.squeeze(0)

In [19]:
for depth_map in predicted_depth.squeeze(0)

SyntaxError: expected ':' (2960779117.py, line 1)

In [21]:
 len([)

21

In [23]:
import open3d as o3d
import numpy as np

# Function to create a point cloud from a depth map
def create_point_cloud(depth_map):
    height, width = depth_map.shape
    x, y = np.meshgrid(np.arange(width), np.arange(height))

    # Convert pixel coordinates to camera coordinates
    z = depth_map.flatten()  # Depth values
    x = x.flatten()  # X pixel coordinates
    y = y.flatten()  # Y pixel coordinates

    # Stack the coordinates (assuming a scale factor of 1 for simplicity)
    points = np.vstack((x, y, z)).T
    valid_points = points[depth_map.flatten() > 0]  # Filter out points with zero depth
    return o3d.geometry.PointCloud(o3d.utility.Vector3dVector(valid_points))

# Load your depth maps
depth_map_files = [depth_map.squeeze(0).numpy() for depth_map in predicted_depth.squeeze(0)] # Replace with your depth map file paths

# Create point clouds
point_clouds = []
for depth_map in depth_map_files:
    point_cloud = create_point_cloud(depth_map)
    
    # Optionally apply a transformation if needed (for example, if you have known camera poses)
    # point_cloud.transform(transformation_matrix)  # Define your transformation matrix if available
    
    point_clouds.append(point_cloud)
    break

# Merge point clouds into a single point cloud
merged_point_cloud = o3d.geometry.PointCloud()
for pc in point_clouds:
    merged_point_cloud += pc

# Optional: Estimate normals for better visualization
merged_point_cloud.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30))

# Optional: Create a mesh using Poisson reconstruction
mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(merged_point_cloud, depth=9)

# Visualize the merged point cloud or mesh
o3d.visualization.draw_geometries([merged_point_cloud])
# o3d.visualization.draw_geometries([mesh])

# Save the merged point cloud or mesh
o3d.io.write_point_cloud("merged_point_cloud.ply", merged_point_cloud)
o3d.io.write_triangle_mesh("reconstructed_mesh.ply", mesh)


True