In [11]:
import numpy as np
import glob
import cv2
import os
import matplotlib.pyplot as plt
import imageio as iio
import math
import os, sys, argparse
import inspect
import shutil

In [12]:
class ColorPalette:
    def __init__(self, numColors):
        np.random.seed(2)
        #self.colorMap = np.random.randint(255, size = (numColors, 3))
        #self.colorMap[0] = 0


        self.colorMap = np.array([[255, 0, 0],
                                  [0, 255, 0],
                                  [0, 0, 255],
                                  [80, 128, 255],
                                  [255, 230, 180],
                                  [255, 0, 255],
                                  [0, 255, 255],
                                  [100, 0, 0],
                                  [0, 100, 0],
                                  [255, 255, 0],
                                  [50, 150, 0],
                                  [200, 255, 255],
                                  [255, 200, 255],
                                  [128, 128, 80],
                                  [0, 50, 128],
                                  [0, 100, 100],
                                  [0, 255, 128],
                                  [0, 128, 255],
                                  [255, 0, 128],
                                  [128, 0, 255],
                                  [255, 128, 0],
                                  [128, 255, 0],
        ])

        if numColors > self.colorMap.shape[0]:
            self.colorMap = np.concatenate([self.colorMap, np.random.randint(255, size = (numColors - self.colorMap.shape[0], 3))], axis=0)
            pass

        return

    def getColorMap(self):
        return self.colorMap

    def getColor(self, index):
        if index >= colorMap.shape[0]:
            return np.random.randint(255, size = (3))
        else:
            return self.colorMap[index]
            pass

In [13]:
def drawDepthImage(depth):
    #return cv2.applyColorMap(np.clip(depth / 10 * 255, 0, 255).astype(np.uint8), cv2.COLORMAP_JET)
    return 255 - np.clip(depth / 5 * 255, 0, 255).astype(np.uint8)

In [14]:
def drawSegmentationImage(segmentations, randomColor=None, numColors=22, blackIndex=-1):
    if segmentations.ndim == 2:
        numColors = max(numColors, segmentations.max() + 2, blackIndex + 1)
    else:
        numColors = max(numColors, segmentations.shape[2] + 2, blackIndex + 1)
        pass
    randomColor = ColorPalette(numColors).getColorMap()
    if blackIndex >= 0:
        randomColor[blackIndex] = 0
        pass
    width = segmentations.shape[1]
    height = segmentations.shape[0]
    if segmentations.ndim == 3:
        #segmentation = (np.argmax(segmentations, 2) + 1) * (np.max(segmentations, 2) > 0.5)
        segmentation = np.argmax(segmentations, 2)
    else:
        segmentation = segmentations
        pass
    segmentation = segmentation.astype(int)
    return randomColor[segmentation.reshape(-1)].reshape((height, width, 3))

In [15]:
def ensure_min_rows(plane):
    current_rows = plane.shape[0]
    
    if current_rows < 20:
        # Add full 0 rows until it has 20 rows
        additional_rows = 20 - current_rows
        zero_rows = np.zeros((additional_rows, plane.shape[1]))
        plane = np.vstack([plane, zero_rows])
    elif current_rows > 20:
        # Take the first 20 rows if it has more than 20 rows
        plane = plane[:20, :]
    
    return plane

In [16]:
##https://github.com/art-programmer/PlaneNet/blob/ccc4423d278388d01cb3300be992b951b90acc7a/pytorch/datasets/scannet_scene.py
## This class handle one scene of the scannet dataset and provide interface for dataloaders
class ScanNetScene():
    def __init__(self, scenePath, scene_id):
        
    
        self.metadata = np.zeros(10)

        with open(scenePath + '/' + scene_id + '.txt') as f:
                    for line in f:
                        line = line.strip()
                        tokens = [token for token in line.split(' ') if token.strip() != '']
                        if tokens[0] == "fx_color":
                            self.metadata[0] = float(tokens[2])
                        if tokens[0] == "fy_color":
                            self.metadata[1] = float(tokens[2])
                        if tokens[0] == "mx_color":
                            self.metadata[2] = float(tokens[2])                            
                        if tokens[0] == "my_color":
                            self.metadata[3] = float(tokens[2])                            
                        elif tokens[0] == "colorWidth":
                            self.colorWidth = int(tokens[2])
                        elif tokens[0] == "colorHeight":
                            self.colorHeight = int(tokens[2])
                        elif tokens[0] == "depthWidth":
                            self.depthWidth = int(tokens[2])
                        elif tokens[0] == "depthHeight":
                            self.depthHeight = int(tokens[2])
                        elif tokens[0] == "numDepthFrames":
                            self.numImages = int(tokens[2])
                            pass
                        continue
                    pass
        self.depthShift = 1000.0
        self.imagePaths = glob.glob(scenePath + '/frames/color/*.jpg')                
        pass
                    
        self.metadata[4] = self.colorWidth
        self.metadata[5] = self.colorHeight
        self.planes = np.load(scenePath + '/annotation/planes.npy')


        return

    def getItemCached(self, imageIndex):
        segmentationPath = self.imagePaths[imageIndex]
        imagePath = segmentationPath.replace('annotation_new/', 'frames/').replace('segmentation.png', 'color.jpg')
        image = cv2.imread(imagePath)
        depth = cv2.imread(imagePath.replace('color.jpg', 'depth.pgm'), -1).astype(np.float32) / self.metadata[6]
        extrinsics_inv = []
        with open(imagePath.replace('color.jpg', 'pose.txt'), 'r') as f:
            for line in f:
                extrinsics_inv += [float(value) for value in line.strip().split(' ') if value.strip() != '']
                continue
            pass
        extrinsics_inv = np.array(extrinsics_inv).reshape((4, 4))
        extrinsics = np.linalg.inv(extrinsics_inv)
        temp = extrinsics[1].copy()
        extrinsics[1] = extrinsics[2]
        extrinsics[2] = -temp

        segmentation = cv2.imread(segmentationPath, -1).astype(np.int32)
        planes = np.load(segmentationPath.replace('segmentation.png', 'planes.npy'))

        info = [image, planes, segmentation, depth, self.metadata]

        if False:
            print(planes)
            print(depth.min(), depth.max())
            cv2.imwrite('test/image.png', image)
            cv2.imwrite('test/depth_ori.png', drawDepthImage(depth))
            cv2.imwrite('test/segmentation.png', drawSegmentationImage(segmentation))
            exit(1)

        return info

    def transformPlanes(self, transformation, planes):
        planeOffsets = np.linalg.norm(planes, axis=-1, keepdims=True)
        
        centers = planes
        centers = np.concatenate([centers, np.ones((planes.shape[0], 1))], axis=-1)
        newCenters = np.transpose(np.matmul(transformation, np.transpose(centers)))
        newCenters = newCenters[:, :3] / newCenters[:, 3:4]

        refPoints = planes - planes / np.maximum(planeOffsets, 1e-4)
        refPoints = np.concatenate([refPoints, np.ones((planes.shape[0], 1))], axis=-1)
        newRefPoints = np.transpose(np.matmul(transformation, np.transpose(refPoints)))
        newRefPoints = newRefPoints[:, :3] / newRefPoints[:, 3:4]

        planeNormals = newRefPoints - newCenters
        planeNormals /= np.linalg.norm(planeNormals, axis=-1, keepdims=True)
        planeOffsets = np.sum(newCenters * planeNormals, axis=-1, keepdims=True)
        newPlanes = planeNormals * planeOffsets
        return newPlanes
        
    def __getitem__(self, imageIndex):
        
        imagePath = self.imagePaths[imageIndex]

       
        segmentationPath = imagePath.replace('frames/color/', 'annotation/segmentation/').replace('.jpg', '.png')
        depthPath = imagePath.replace('color', 'depth').replace('.jpg', '.png')
        posePath = imagePath.replace('color', 'pose').replace('.jpg', '.txt')
        pass
        
        image = cv2.imread(imagePath)
        depth = cv2.imread(depthPath, -1).astype(np.float32) / self.depthShift

        extrinsics_inv = []
        with open(posePath, 'r') as f:
            for line in f:
                extrinsics_inv += [float(value) for value in line.strip().split(' ') if value.strip() != '']
                continue
            pass
        extrinsics_inv = np.array(extrinsics_inv).reshape((4, 4))
        extrinsics = np.linalg.inv(extrinsics_inv)
        
        segmentation = cv2.imread(segmentationPath, -1).astype(np.int32)
        segmentation = segmentation[:, :, 2] * 256 * 256 + segmentation[:, :, 1] * 256 + segmentation[:, :, 0]
        
        segmentation = segmentation / 100 - 1
        segments, counts = np.unique(segmentation, return_counts=True)
        segmentList = zip(segments.tolist(), counts.tolist())
        segmentList = [segment for segment in segmentList if (int(segment[0]) not in [-1, 167771]) and  (int(segment[0])< self.planes.shape[0])]  #added the constriction that the index is inside the range of planes
        segmentList = sorted(segmentList, key=lambda x:-x[1])
        
        newPlanes = []
        newSegmentation = np.full(segmentation.shape, fill_value=-1, dtype=np.int32)
        for newIndex, (oriIndex, count) in enumerate(segmentList):
            if count < (segmentation.shape[0] * segmentation.shape[1]) * 0.02:
                continue
            newPlanes.append(self.planes[int(oriIndex)])
            newSegmentation[segmentation == int(oriIndex)] = newIndex
            continue

        newPlanes = np.array(newPlanes)

        temp = extrinsics[1].copy()
        extrinsics[1] = extrinsics[2]
        extrinsics[2] = -temp

        if len(newPlanes) > 0:
            newPlanes = self.transformPlanes(extrinsics, newPlanes)
            pass

        image = cv2.resize(image, (256, 192))
        depth = cv2.resize(depth, (256, 192))
        newSegmentation= cv2.resize(newSegmentation, (256, 192), interpolation=cv2.INTER_NEAREST)
        

        info = [image, newPlanes, newSegmentation, depth, self.metadata]

        if False:
            print(newPlanes)
            print(np.unique(newSegmentation))
            print(depth.min(), depth.max())
            cv2.imwrite('/home/steve/Documents/ml3d/Project/NPZ/test/image.png', image)
            cv2.imwrite('/home/steve/Documents/ml3d/Project/NPZ/test/depth_ori.png', drawDepthImage(depth))
            cv2.imwrite('/home/steve/Documents/ml3d/Project/NPZ/test/segmentation.png', drawSegmentationImage(newSegmentation))
            for index in range(newSegmentation.max() + 1):
                print(index, newPlanes[index])
                cv2.imwrite('/home/steve/Documents/ml3d/Project/NPZ/test/mask_' + str(index) + '.png', (newSegmentation == index).astype(np.uint8) * 255)
                continue
            exit(1)
        
        return info

In [17]:
scenes =  ['0000_00', '0111_01' ,'0234_00' ,'0368_00' ,'0515_02', '0005_00' ,'0033_00', '0068_00', '0152_01', '0177_00', '0289_00', '0341_00', '0476_00', '0610_01', '0679_00']         #['0000_00', '0111_01' ,'0234_00' ,'0368_00' ,'0515_02']
for num_s, s in enumerate(scenes):
    scene_id = s
    sn = ScanNetScene('/home/steve/Documents/ml3d/Project/Data/scans/scene' + scene_id, 'scene'+scene_id)
    path_scenes = '/home/steve/Documents/ml3d/Project/Data/scans'
    path_image = path_scenes + '/scene' + scene_id + '/frames/color'
    path_depth = path_scenes + '/scene' + scene_id + '/frames/depth'
    path_segmentation = path_scenes + '/scene' + scene_id + '/annotation/segmentation'
    path_plane = path_scenes + '/scene' + scene_id + '/annotation/planes.npy'
    path_semantics = path_scenes + '/scene' + scene_id + '/scene' + scene_id  + '_2d-label/label'
    path_pose = path_scenes + '/scene' + scene_id + '/frames/pose'
    path_intrinsic= path_scenes+ '/scene' + scene_id + '/frames/intrinsic/intrinsic_depth.txt'
    files1 = os.listdir(path_image)
    num_files1 = len(files1)
    files2 = os.listdir(path_depth)
    num_files2 = len(files2)
    assert num_files1 == num_files2
    for i in range(0, num_files1):
        data = sn.__getitem__(i)
        image = np.array(data[0])
        plane = np.array(data[1])
        if plane.shape[0] == 0:
            continue
        num_planes = np.array(plane.shape[0])
        plane = ensure_min_rows(plane)
        segmentation = data[2]
        segmentation = segmentation.reshape(*segmentation.shape, 1)
        depth = data[3]
        depth = depth.reshape(*depth.shape, 1)
        segmentation_path = path_segmentation + '/' + str(i) + '.png'
        semantic_path = path_semantics + '/' + str(i) + '.png'
        semantic = np.array(cv2.resize(iio.v3.imread(semantic_path),(256, 192), interpolation=cv2.INTER_NEAREST))
        np.savez('/home/steve/Documents/ml3d/npz_okay/' + str(num_s*2000000 + i) + '.npz', image = image, plane = plane, depth = depth, semantics = semantic, segmentation= segmentation, num_planes = num_planes.reshape((1,)) )


KeyboardInterrupt: 

In [2]:
#create the train and val directories and txt

source_directory="/home/steve/Documents/ml3d/npz_okay/"
train_txt="/home/steve/Documents/ml3d/Project/NPZ/train.txt"
val_txt="/home/steve/Documents/ml3d/Project/NPZ/val.txt"

train_destination="/home/steve/Documents/ml3d/Project/PlanarReconstruction_ML3D/processed_data/train"
val_destination="/home/steve/Documents/ml3d/Project/PlanarReconstruction_ML3D/processed_data/val"

# Create the destination directories
os.makedirs(train_destination, exist_ok=True)
os.makedirs(val_destination, exist_ok=True)

# Copy files listed in train.txt to train directory
with open(train_txt, 'r') as file:
    for filename in file.read().splitlines():
        shutil.copy(os.path.join(source_directory, filename), train_destination)

# Copy files listed in val.txt to val directory
with open(val_txt, 'r') as file:
    for filename in file.read().splitlines():
        shutil.copy(os.path.join(source_directory, filename), val_destination)