# Applying OpenSfM to find the depth for points in an image

OpenSfM is able to fully construct a point cloud for a collection of images that captures 3D information on the scene. We want to leverage this point cloud to create a 3D parametric representation. As such, we need to be able to go from detected wireframe features to 3D points in the point cloud.

The general idea will be to attempt to project out from a detected line endpoint/junction in an image to an approximate location in the depth map found by OpenSfM. We can attempt to "interpolate" depth by using the depth values of points that project close to the given point, and assuming they should be coplanar or just averaging the depths.

We should investigate how to access the resulting `merged.ply` point cloud data as well as the camera poses in order to accomplish the above.

## Follow the resources in SfM.ipynb for setup and running the OpenSfM pipeline

For this, using the OpenSfM `opensfm_run_all` executable should be good enough to generate your data.

In [1]:
import sys, os
import utils
import numpy as np
import yaml, json
import cv2
from plyfile import PlyData

sys.path.append('../OpenSfM')
from opensfm import features, config
from sfm import utils as sfm_util

from scipy.spatial.transform import Rotation

In [21]:
# Calibration parameters created using the steps in BasicOpenCV.ipynb
# NOTE: Camera calibration parameters will not work with video frames!
intrinsic_mat = np.load(utils.data("numpy/intrinsic_mat.npy"))
distortion_mat = np.load(utils.data("numpy/distortion_mat.npy"))
# Average the two focal lengths to get a best guess focal length
f = (intrinsic_mat[0, 0] + intrinsic_mat[1, 1]) / 2.0
# k1 and k2 are first two parameters of the distortion matrix
k1 = distortion_mat[0, 0]
k2 = distortion_mat[0, 1]

In [28]:
print("{}\n{}\n{}".format(f, k1, k2))

3308.435718742333
0.30446738772195164
-2.1296322572508846


In [13]:
# IMPORTANT: Set the project directory path here
project_dir = utils.data("door_closed/")


conf = config.load_config(os.path.join(project_dir, "config.yaml"))
depthmaps_dir = os.path.join(project_dir, "undistorted/depthmaps")
with open(os.path.join(project_dir, "reports/reconstruction.json")) as f:
    reconstruction_report = json.load(f)
image_dir = os.path.join(project_dir, "images")

# The merged.ply contains the depth information for points in the images (probably redundant with reconstruction.meshed.json)
numpy_merged_points = os.path.join(project_dir, "merged_points.npy")
try:
    points = np.load(numpy_merged_points)
except FileNotFoundError:
    merged_ply = PlyData.read(os.path.join(depthmaps_dir, "merged.ply"))
    element = merged_ply.elements[0]
    points = np.vstack((element.data['x'], element.data['y'], element.data['z'])).transpose()
    np.save(numpy_merged_points, points)
    print("Created numpy file for merged.ply points")


# reconstruction.meshed.json contain the rotations and translations of each camera, along with the mesh points in the image
with open(os.path.join(project_dir, "reconstruction.meshed.json")) as f:
    reconstruction_meshed = json.load(f)

In [14]:
print(reconstruction_report["not_reconstructed_images"])

other_list = []

for imname in os.listdir(image_dir):
    print("Processing {}...".format(imname))

    prunedname = imname + ".pruned.npz"
    try:
        pruned = np.load(os.path.join(depthmaps_dir, prunedname))
    except FileNotFoundError:
        print("Skipping {}: No depthmap found".format(imname))
        continue
    #im = cv2.imread(os.path.join(image_dir, imnamejpg))
    points = pruned["points"]
    if points.shape[0] == 0:
        print("Skipping {}: No points in pruned mesh".format(imname))
        continue
    other_list.append(imname)

['img_25.png', 'img_43.png', 'img_59.png', 'img_53.png', 'img_52.png', 'img_46.png', 'img_24.png']
Processing img_23.png...
Skipping img_23.png: No points in pruned mesh
Processing img_14.png...
Processing img_31.png...
Skipping img_31.png: No depthmap found
Processing img_55.png...
Skipping img_55.png: No depthmap found
Processing img_44.png...
Skipping img_44.png: No depthmap found
Processing img_59.png...
Skipping img_59.png: No depthmap found
Processing img_63.png...
Processing img_20.png...
Processing img_13.png...
Processing img_61.png...
Processing img_52.png...
Skipping img_52.png: No depthmap found
Processing img_49.png...
Skipping img_49.png: No depthmap found
Processing img_60.png...
Processing img_22.png...
Processing img_57.png...
Skipping img_57.png: No depthmap found
Processing img_28.png...
Skipping img_28.png: No depthmap found
Processing img_12.png...
Processing img_26.png...
Skipping img_26.png: No depthmap found
Processing img_54.png...
Skipping img_54.png: No depth

In [15]:
from wireframe import Wireframe

# Make sure to put your pretrained model data in the data directory!
config_file = utils.data("wireframe.yaml")
model_file = utils.data("pretrained_lcnn.pth.tar")

w = Wireframe(config_file, model_file, "")

if not w.setup():
    print("An error occured trying to setup the wireframe: {}".format(w.error))

CUDA is not available


In [16]:
def process_image(imname, info, camera, debug=True):
    """
    Serves to run on the 3D information given by the reconstruction meshed json file.
    
    Arguments:
    imname -- string image name to process (original file found in images dir)
    info -- dictionary retrieved from reconstructed_meshed
    """
    print("Processing {}...".format(imname))
    impath = str(os.path.join(image_dir, imname))
    points = np.array(info["vertices"])
    rotation = Rotation.from_rotvec(info["rotation"]).as_matrix()
    translation = np.array(info["translation"])
    if debug:
        print("Rotation:\n{}\nTranslation:\n{}".format(rotation,translation))
        print("Depth map consists of {} points".format(points.shape[0]))
    
    rec = w.parse(impath)
    nlines, nscores = rec.postprocess(threshold=0.9)
    if debug:
        print("Wireframe found {} lines with score passing threshold".format(nlines.shape[0]))
        
    
    

In [18]:
camera = reconstruction_meshed[0]['cameras']
for r in reconstruction_meshed:
    print(r['cameras'])
    continue
    for imname in r['shots'].keys():
        process_image(imname, r['shots'][imname], camera)

{'v2 unknown unknown 1920 1080 perspective 0': {'projection_type': 'perspective', 'width': 1920, 'height': 1080, 'focal': 0.9117969061343857, 'k1': -0.00010384566236784214, 'k2': -0.037270710547293515}}
{'v2 unknown unknown 1920 1080 perspective 0': {'projection_type': 'perspective', 'width': 1920, 'height': 1080, 'focal': 0.8685991044967001, 'k1': -0.024741197839431205, 'k2': -0.01730096812568517}}
{'v2 unknown unknown 1920 1080 perspective 0': {'projection_type': 'perspective', 'width': 1920, 'height': 1080, 'focal': 0.8498940797661843, 'k1': 0.0003137101231893825, 'k2': -0.0026578494644543685}}
