In [None]:
import re
from collections import defaultdict
from io import BytesIO
from sys import stdout
from xml.etree.ElementTree import ElementTree, Element, SubElement, dump
from xml.dom.minidom import parse

import numpy as np
import pandas as pd
from scipy.spatial import KDTree


def add_array_set(l, a, id_a):
    """
    add array to list of arrays, only keep unique
    return index of added or kept array
    """
    for id_b, b in enumerate(l):
        if np.allclose(a, b):
            return id_b
    l += [a]
    return id_a

In [None]:
# collect list of pixel coordinates per pair
res = {}

# accumulate manual click output from multiple files
content = ''
files = (
    '/Volumes/davidh-ssd/manual_ips_angle1_v1.txt',
    '/Volumes/davidh-ssd/manual_ips_angle2.txt',
    '/Volumes/davidh-ssd/manual_ips_mview.txt',
)
for file in files:
    with open(file, 'r') as fd:
        content += '\n' + fd.read()

# split on pair header : !vid_a-vid_b
pair_cts = content.split('\n!')
p_header = re.compile('!*([0-9]+)-([0-9]+)')

for pair_ct in pair_cts:

    # ignore empty or commented-out content
    if pair_ct.strip() == '' or pair_ct.strip().startswith('#'):
        continue
    
    # get vid-pair
    header = pair_ct.strip().split('\n', 1)[0]
    vid_a, vid_b = p_header.match(header.strip()).groups()
    
    res_pair = []
    pa = re.compile('.*?tpId=0 setupId={}--- global: (\(.*?\))--- pixel: (\(.*?\)).*?'.format(vid_a))
    pb = re.compile('.*?\n?tpId=0 setupId={}--- global: (\(.*?\))--- pixel: (\(.*?\)).*?'.format(vid_b))
    
    # split on --- lines
    lines = pair_ct.split('---\n')[1:]
    
    # go over pairs of file chunks
    for line_a, line_b in zip(*[iter(lines)]*2):
        
        try:
            # parse global, pixel coords
            gla, pxa = pa.match(line_a.strip()).groups()
            glb, pxb = pb.match(line_b.strip()).groups()
        except AttributeError as ex:
            print('Error parsing {} on:\n{}\n{}'.format((vid_a, vid_b), line_a, line_b))
            continue
            
        # add as pair of np-arrays
        res_pair += [(np.array([*map(float, pxa.strip('()').split(','))]),
                      np.array([*map(float, pxb.strip('()').split(','))]))]
    
    # add all for pair
    res[(int(vid_a), int(vid_b))] = res_pair

In [None]:
# parameters for outlier removal
# roughly based on http://docs.pointclouds.org/1.7.1/classpcl_1_1_statistical_outlier_removal.html#details
sd_mult_t = 3.0 # how many sds pixel-click pairs may devate from mean until they are removed
p_estim = 0.8 # quantile of pixel-click pairs to use for mean, sd estimation

# format string for MVR interest point files 
ip_fstring = '/Volumes/davidh-ssd/BS_TEST/interestpoints/tpId_0_viewSetupId_{}.beads.ip.txt'

res_ip = {}
for vid_a, vid_b in res.keys():

    # build kdtrees for real ips
    df_a = pd.read_csv(ip_fstring.format(vid_a), sep='\t')
    locs_a = np.array(df_a[['x', 'y', 'z']])
    kd_a = KDTree(locs_a)
    df_b = pd.read_csv(ip_fstring.format(vid_b), sep='\t')
    locs_b = np.array(df_b[['x', 'y', 'z']])
    kd_b = KDTree(locs_b)

    res_pair = [] # pixel point pairs from IPs
    manual_pair = [] # pixel point pairs from clicks
    # find closest neighbours
    for point_a, point_b in res[(vid_a, vid_b)]:
        d_a, idx_a = kd_a.query(point_a)
        d_b, idx_b = kd_b.query(point_b)
        # keep pair only if we find reasonable match (distance of both < d_thresh)
        # NB: removed for statistical outlier removal
        #if (d_a < d_thresh and d_b < d_thresh):
        res_pair += [(locs_a[idx_a], locs_b[idx_b])]
        manual_pair += [(point_a, point_b)]

    # filter outliers    
    # vid_a: get mean and sd of pixel-clicked coordinates
    ds = [l1 - l2 for (l1,_),(l2,_) in zip(res_pair, manual_pair)]
    mu, sd = (np.mean(np.array(sorted(ds, key= lambda p: np.linalg.norm(p))[:int(len(ds)*p_estim)]), axis=0),
              np.std(np.array(sorted(ds, key= lambda p: np.linalg.norm(p))[:int(len(ds)*p_estim)]), axis=0))
    
    # remember indices of good points
    idxes_good = set([idx for idx, d in enumerate(ds) if np.all(np.abs(mu-d) < sd * sd_mult_t)])

    # same for vid_b
    ds = [l1 - l2 for (_, l1),(_, l2) in zip(res_pair, manual_pair)]
    mu, sd = (np.mean(np.array(sorted(ds, key= lambda p: np.linalg.norm(p))[:int(len(ds)*p_estim)]), axis=0),
              np.std(np.array(sorted(ds, key= lambda p: np.linalg.norm(p))[:int(len(ds)*p_estim)]), axis=0))
    
    # good points for both views
    idxes_good &= set([idx for idx, d in enumerate(ds) if np.all(np.abs(mu-d) < sd * sd_mult_t)])
    
    # keep filtered IP coordinate pairs
    res_pair_filt = [rp for idx,rp in enumerate(res_pair) if idx in idxes_good] 
    res_ip[(vid_a, vid_b)] = res_pair_filt

# quick check: how many pairs remain?
for k, v in res_ip.items():
    print(k, len(v))

In [None]:
# create BigSticher Interest point files
# and xml-chunk to be added to dataset.xml

# where to put ip files
ip_out_fstring = '/Volumes/davidh-ssd/BS_TEST/interestpoints/tpId_0_viewSetupId_{}.manual.ip.txt'
ip_out_corr_fstring = '/Volumes/davidh-ssd/BS_TEST/interestpoints/tpId_0_viewSetupId_{}.manual.corr.txt'

# xml-interestpoint description
rel_ip_out_fstring = 'interestpoints/tpId_0_viewSetupId_{}.manual'

# collect ips for views
res_singlevid = defaultdict(list)
res_corr = defaultdict(list) # save correspondence ids
ids = defaultdict(int) # running counts
for (vid_a, vid_b), ips in res_ip.items():
    for (ip_a, ip_b) in ips:
        id_ma = add_array_set(res_singlevid[vid_a], ip_a, ids[vid_a])
        if id_ma == ids[vid_a]: # new ip
            ids[vid_a] += 1
            
        id_mb = add_array_set(res_singlevid[vid_b], ip_b, ids[vid_b])
        if id_mb == ids[vid_b]:
            ids[vid_b] += 1
            
        # remember correspondences
        res_corr[vid_a].append((id_ma, (vid_b, id_mb)))
        res_corr[vid_b].append((id_mb, (vid_a, id_ma)))
        
''' # NB: commented out to not mess up id numbering from above
# sort by z,y,x
for k, v in res_singlevid.items():
    v.sort(key=lambda x: list(reversed(list(x))))
'''

# write ip file
for vid in res_singlevid.keys():
    df = pd.DataFrame(np.hstack(
        (np.expand_dims(np.arange(len(res_singlevid[vid])),1),
         np.array(res_singlevid[vid]))
    ), columns=['id', 'x', 'y', 'z'])
    df['id'] = df['id'].astype(np.int)
    df.to_csv(ip_out_fstring.format(vid), index=False, sep='\t')
    
    # dump corresondences
    with open(ip_out_corr_fstring.format(vid), 'w') as fd:
        fd.write('id\tcorresponding_timepoint_id\tcorresponding_viewsetup_id\tcorresponding_label\tcorresponding_id\n')
        for (id_a, (vid_b, id_b)) in res_corr[vid]:
            fd.write(f'{id_a}\t0\t{vid_b}\tmanual\t{id_b}\n')
        

# create XML for new ips
xml = Element('ViewInterestPoints')
for vid in res_singlevid.keys():
    SubElement(xml, 'ViewInterestPointsFile',
               timepoint='0', setup=str(vid), label='manual', params='manually picked'
              ).text=rel_ip_out_fstring.format(vid)

# for pretty print: write to in-memory file, prettyprint with minidom
io = BytesIO()
ElementTree(xml).write(io)
io.seek(0)
print(parse(io).toprettyxml())

In [None]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

# manual inspection in 3d plots

pairs = list(res_ip.values())[1]
xs, ys, zs = [], [], []
for l1, l2 in pairs:
    d = l1 - l2
    xs.append(d[0])
    ys.append(d[1])
    zs.append(d[2])

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(xs, ys, zs)