In [None]:
import re
from collections import defaultdict
from io import BytesIO
from sys import stdout
from xml.etree.ElementTree import ElementTree, Element, SubElement, dump
from xml.dom.minidom import parse

import numpy as np
import pandas as pd
from scipy.spatial import KDTree


def add_array_set(l, a, id_a):
    """
    add array to list of arrays, only keep unique
    return index of added or kept array
    """
    for id_b, b in enumerate(l):
        if np.allclose(a, b):
            return id_b
    l += [a]
    return id_a

# OLD click parsing, use code below instead

In [None]:
# collect list of pixel coordinates per pair
res = {}

# accumulate manual click output from multiple files
content = ''
files = (
    '/Volumes/davidh-ssd/manual_ips_angle1_v1.txt',
    '/Volumes/davidh-ssd/manual_ips_angle2.txt',
    '/Volumes/davidh-ssd/manual_ips_mview.txt',
)
for file in files:
    with open(file, 'r') as fd:
        content += '\n' + fd.read()

# split on pair header : !vid_a-vid_b
pair_cts = content.split('\n!')
p_header = re.compile('!*([0-9]+)-([0-9]+)')

for pair_ct in pair_cts:

    # ignore empty or commented-out content
    if pair_ct.strip() == '' or pair_ct.strip().startswith('#'):
        continue
    
    # get vid-pair
    header = pair_ct.strip().split('\n', 1)[0]
    vid_a, vid_b = p_header.match(header.strip()).groups()
    
    res_pair = []
    pa = re.compile('.*?tpId=0 setupId={}--- global: (\(.*?\))--- pixel: (\(.*?\)).*?'.format(vid_a))
    pb = re.compile('.*?\n?tpId=0 setupId={}--- global: (\(.*?\))--- pixel: (\(.*?\)).*?'.format(vid_b))
    
    # split on --- lines
    lines = pair_ct.split('---\n')[1:]
    
    # go over pairs of file chunks
    for line_a, line_b in zip(*[iter(lines)]*2):
        
        try:
            # parse global, pixel coords
            gla, pxa = pa.match(line_a.strip()).groups()
            glb, pxb = pb.match(line_b.strip()).groups()
        except AttributeError as ex:
            print('Error parsing {} on:\n{}\n{}'.format((vid_a, vid_b), line_a, line_b))
            continue
            
        # add as pair of np-arrays
        res_pair += [(np.array([*map(float, pxa.strip('()').split(','))]),
                      np.array([*map(float, pxb.strip('()').split(','))]))]
    
    # add all for pair
    res[(int(vid_a), int(vid_b))] = res_pair

# 1a: Parse click log: Between images

In [None]:


def parse_click_log(file, is_split=False, use_raw_split=False, split_dataset=None, fold_split=8):
    
    # we have a split dataset, want to use grouped views, but have not provided a dataset file
    if is_split and not use_raw_split and split_dataset is None:
        return {}
        
    # get all registrations from XML
    if split_dataset is not None:
        root = ElementTree().parse(split_dataset)
        vrs = root.findall('./ViewRegistrations/ViewRegistration')

    p_click = re.compile('.*?tpId=0 setupId=([0-9]+?)--- global: (\(.*?\))--- pixel: (\(.*?\)).*?')
    p_header = re.compile('([0-9]+)-([0-9]+)')

    res_pair = defaultdict(list)
    fold_split=8

    with open(file, 'r') as fd:
        ct = fd.read()

    # remove comments and empty lines
    ct = '\n'.join(filter(lambda l: not l.strip().startswith('#') and not l.strip()=='', ct.split('\n')))
    
    # split on !headers
    blocks = list(filter(lambda c: c.strip() != '', ct.split('!')))

    
    for block in blocks:

        header, clicks = block.split('\n', 1)
        vid_a, vid_b = p_header.match(header.strip()).groups()

        clicks = list(map(str.strip, filter(lambda c: c.strip() != '', clicks.split('---\n'))))
        
        for click_a, click_b in zip(*[iter(clicks)]*2):

            clia = click_a.split('\n')
            clib = click_b.split('\n')

            locs_a = [p_click.match(c.strip()).groups() 
                      for c in clia if (
                          int(p_click.match(c.strip()).groups()[0]) // (fold_split if is_split else 1)
                      ) == int(vid_a)]
            locs_b = [p_click.match(c.strip()).groups() 
                      for c in clib if (
                          int(p_click.match(c.strip()).groups()[0]) // (fold_split if is_split else 1)
                      ) == int(vid_b)]

            locs_a = [(int(vid), np.array([*map(float, pxa.strip('()').split(','))])) for vid, _, pxa in locs_a]
            locs_b = [(int(vid), np.array([*map(float, pxa.strip('()').split(','))])) for vid, _, pxa in locs_b] 

            if not use_raw_split:
                locs_a = locs_a[:1]
                locs_b = locs_b[:1]
                
            if is_split and not use_raw_split:
                locs_a_p = []
                for (vi, l) in locs_a:
                    # get view transformations of split views
                    vt = vrs[vi].findall('./ViewTransform')[-1]
                    affine = np.zeros((4,4))
                    affine[:3] += np.array(list(vt)[1].text.split()).astype(np.float).reshape((3,4))
                    affine[3,3] = 1.0
                    lp = affine.dot(np.array(list(l) + [1]))[:3]
                    locs_a_p.append((vi, lp))
                locs_a = locs_a_p

                locs_b_p = []
                for (vi, l) in locs_b:
                    vt = vrs[vi].findall('./ViewTransform')[-1]
                    affine = np.zeros((4,4))
                    affine[:3] += np.array(list(vt)[1].text.split()).astype(np.float).reshape((3,4))
                    affine[3,3] = 1.0
                    lp = affine.dot(np.array(list(l) + [1]))[:3]
                    locs_b_p.append((vi, lp))
                locs_b = locs_b_p
            
            if not use_raw_split:
                locs_a = [(vid_a, l) for _, l in locs_a]
                locs_b = [(vid_b, l) for _, l in locs_b] 
                # TODO: change vid to group

            for ((vida, loc1), (vidb, loc2)) in product(locs_a, locs_b):
                res_pair[(vida, vidb)].append((loc1, loc2))
    return res_pair

#file = '/Users/david/Desktop/split-manual-ips-angle1.txt'
file = '/Volumes/davidh-ssd/manual_ips_angle1_v1.txt'
parse_click_log(file, is_split=False, use_raw_split=False,
                split_dataset='/Volumes/davidh-ssd/BS_TEST/dataset_prealign.split.xml')

tasks = [
    {
        'file': '/Users/david/Desktop/split-manual-ips-angle1.txt',
        'is_split': True,
        'use_raw_split': False,
        'split_dataset':'/Volumes/davidh-ssd/BS_TEST/dataset_prealign.split.xml'
    },
    {
        'file': '/Users/david/Desktop/split-manual-ips-angle2.txt',
        'is_split': True,
        'use_raw_split': False,
        'split_dataset':'/Volumes/davidh-ssd/BS_TEST/dataset_prealign.split.xml'
    },
    {
        'file': '/Users/david/Desktop/split-manual-ips-mview.txt',
        'is_split': True,
        'use_raw_split': False,
        'split_dataset':'/Volumes/davidh-ssd/BS_TEST/dataset_prealign.split.xml'
    },
    {
        'file': '/Volumes/davidh-ssd/manual_ips_angle1_v1.txt',
        'is_split': False,
        'use_raw_split': False,
        'split_dataset':None
    },
    {
        'file': '/Volumes/davidh-ssd/manual_ips_angle2.txt',
        'is_split': False,
        'use_raw_split': False,
        'split_dataset':None
    },
    {
        'file': '/Volumes/davidh-ssd/manual_ips_mview.txt',
        'is_split': False,
        'use_raw_split': False,
        'split_dataset':None
    }
]

res = defaultdict(list)
for task in tasks:
    for k,v in parse_click_log(**task).items():
        res[k].extend(v)
        res[k] = sorted(res[k], key=lambda x: x[0][0])

res

# 1b: Parse click log (withing split images)

In [None]:
def parse_within_view_clicks(file, fold_split=8):
    
    with open(file, 'r') as fd:
        ct = fd.read()

    # remove comments and empty lines
    ct = '\n'.join(filter(lambda l: not l.strip().startswith('#') and not l.strip()=='', ct.split('\n')))
    
    # split on !headers
    blocks = list(filter(lambda c: c.strip() != '', ct.split('!')))
    
    res_pair = defaultdict(list)
    
    for block in blocks:

        header, clicks = block.split('\n', 1)
        vid, vid = p_header.match(header.strip()).groups()

        clicks = list(map(str.strip, filter(lambda c: c.strip() != '', clicks.split('---\n'))))
        
        for click in clicks:

            cli = click.split('\n')
            
            locs = [p_click.match(c.strip()).groups() 
                      for c in cli if (
                          int(p_click.match(c.strip()).groups()[0]) // fold_split
                      ) == int(vid)]

            locs = [(int(vid_), np.array([*map(float, pxa.strip('()').split(','))])) for vid_, _, pxa in locs]

            for ((vida, loc1), (vidb, loc2)) in combinations(locs, 2):
                if vida ^ vidb in [2**n for n in range(3)]:
                    res_pair[(vida, vidb)].append((loc1, loc2))
    return res_pair

tasks = [
    {
        'file': '/Users/david/Desktop/ips_within_split_angle1.txt',
    },
    {
        'file': '/Users/david/Desktop/ips_within_split_angle2.txt',
    },
]

res = defaultdict(list)
for task in tasks:
    for k,v in parse_within_view_clicks(**task).items():
        res[k].extend(v)
        res[k] = sorted(res[k], key=lambda x: x[0][0])

res

# 2: Map manual clicks to closest IP in dataset

In [None]:
# parameters for outlier removal
# roughly based on http://docs.pointclouds.org/1.7.1/classpcl_1_1_statistical_outlier_removal.html#details
sd_mult_t = 3.0 # how many sds pixel-click pairs may devate from mean until they are removed
p_estim = 0.8 # quantile of pixel-click pairs to use for mean, sd estimation

# format string for MVR interest point files 
#ip_fstring = '/Volumes/davidh-ssd/BS_TEST/interestpoints/tpId_0_viewSetupId_{}.beads.ip.txt'
ip_fstring = '/Volumes/davidh-ssd/BS_TEST/interestpoints/new_tpId_0_viewSetupId_{}.beads.ip.txt'

res_ip = {}
for vid_a, vid_b in res.keys():

    # build kdtrees for real ips
    df_a = pd.read_csv(ip_fstring.format(vid_a), sep='\t')
    locs_a = np.array(df_a[['x', 'y', 'z']])
    kd_a = KDTree(locs_a)
    df_b = pd.read_csv(ip_fstring.format(vid_b), sep='\t')
    locs_b = np.array(df_b[['x', 'y', 'z']])
    kd_b = KDTree(locs_b)

    res_pair = [] # pixel point pairs from IPs
    manual_pair = [] # pixel point pairs from clicks
    # find closest neighbours
    for point_a, point_b in res[(vid_a, vid_b)]:
        d_a, idx_a = kd_a.query(point_a)
        d_b, idx_b = kd_b.query(point_b)
        # keep pair only if we find reasonable match (distance of both < d_thresh)
        # NB: removed for statistical outlier removal
        #if (d_a < d_thresh and d_b < d_thresh):
        res_pair += [(locs_a[idx_a], locs_b[idx_b])]
        manual_pair += [(point_a, point_b)]

    # filter outliers    
    # vid_a: get mean and sd of pixel-clicked coordinates
    ds = [l1 - l2 for (l1,_),(l2,_) in zip(res_pair, manual_pair)]
    mu, sd = (np.mean(np.array(sorted(ds, key= lambda p: np.linalg.norm(p))[:int(len(ds)*p_estim)]), axis=0),
              np.std(np.array(sorted(ds, key= lambda p: np.linalg.norm(p))[:int(len(ds)*p_estim)]), axis=0))
    
    # remember indices of good points
    idxes_good = set([idx for idx, d in enumerate(ds) if np.all(np.abs(mu-d) < sd * sd_mult_t)])

    # same for vid_b
    ds = [l1 - l2 for (_, l1),(_, l2) in zip(res_pair, manual_pair)]
    mu, sd = (np.mean(np.array(sorted(ds, key= lambda p: np.linalg.norm(p))[:int(len(ds)*p_estim)]), axis=0),
              np.std(np.array(sorted(ds, key= lambda p: np.linalg.norm(p))[:int(len(ds)*p_estim)]), axis=0))
    
    # good points for both views
    idxes_good &= set([idx for idx, d in enumerate(ds) if np.all(np.abs(mu-d) < sd * sd_mult_t)])
    
    # keep filtered IP coordinate pairs
    res_pair_filt = [rp for idx,rp in enumerate(res_pair) if idx in idxes_good] 
    res_ip[(vid_a, vid_b)] = res_pair_filt

# quick check: how many pairs remain?
for k, v in res_ip.items():
    print(k, len(v), "!!!" if len(v) < 4 else "")
    
#res_ip[(6,7)]

## 3a: Creating Interest Point and Correspondence files (from scratch)

Use this to create new IPs for BigStitcher (e.g. after manually selecting them in image-pairs)

In [None]:
# create BigSticher Interest point files
# and xml-chunk to be added to dataset.xml

# where to put ip files
ip_out_fstring = '/Volumes/davidh-ssd/BS_TEST/interestpoints/tpId_0_viewSetupId_{}.manual.ip.txt'
ip_out_corr_fstring = '/Volumes/davidh-ssd/BS_TEST/interestpoints/tpId_0_viewSetupId_{}.manual.corr.txt'

# xml-interestpoint description
rel_ip_out_fstring = 'interestpoints/tpId_0_viewSetupId_{}.manual'

# collect ips for views
res_singlevid = defaultdict(list)
res_corr = defaultdict(list) # save correspondence ids
ids = defaultdict(int) # running counts
for (vid_a, vid_b), ips in res_ip.items():
    for (ip_a, ip_b) in ips:
        new_a = False
        id_ma = add_array_set(res_singlevid[vid_a], ip_a, ids[vid_a])
        if id_ma == ids[vid_a]: # new ip
            ids[vid_a] += 1
            new_a = True
            
        new_b = False
        id_mb = add_array_set(res_singlevid[vid_b], ip_b, ids[vid_b])
        if id_mb == ids[vid_b]:
            ids[vid_b] += 1
            new_b = True
            
        # remember correspondences
        if new_a or new_b:
            res_corr[vid_a].append((id_ma, (vid_b, id_mb)))
            res_corr[vid_b].append((id_mb, (vid_a, id_ma)))
            
        # we already have both IPs, but are they correspondences yet?
        else:
            found = False
            for (id_ma1, (vid_b1, id_mb1)) in res_corr[vid_a]:
                if id_ma1 == id_ma and id_mb1 == id_mb:
                    found = True
            if not found:
                res_corr[vid_a].append((id_ma, (vid_b, id_mb)))
                res_corr[vid_b].append((id_mb, (vid_a, id_ma)))

## 3b: Creating Interest Point and Correspondence files (merge into existing)

Use this to merge 'within-image' correspondences we got after splitting

In [None]:
import os

# create BigSticher Interest point files
# and xml-chunk to be added to dataset.xml

# where to put ip files
ip_out_fstring = '/Volumes/davidh-ssd/BS_TEST/interestpoints/new_tpId_0_viewSetupId_{}.manual.ip.txt'
ip_out_corr_fstring = '/Volumes/davidh-ssd/BS_TEST/interestpoints/new_tpId_0_viewSetupId_{}.manual.corr.txt'

# xml-interestpoint description
rel_ip_out_fstring = 'interestpoints/tpId_0_viewSetupId_{}.manual'

# collect ips for views
res_singlevid = defaultdict(list)
res_corr = defaultdict(list) # save correspondence ids
ids = defaultdict(int) # running counts


for (vid_a, vid_b), ips in res_ip.items():
    
    if not vid_a in res_singlevid:
        if os.path.exists(ip_out_fstring.format(vid_a)):
            res_singlevid[vid_a] = [v for v in pd.read_csv(ip_out_fstring.format(vid_a), sep='\t')[['x', 'y', 'z']].values]
            ids[vid_a] = len(res_singlevid[vid_a])
        if os.path.exists(ip_out_corr_fstring.format(vid_a)):
            res_corr[vid_a] = [(int(v[0]),(int(v[1]), int(v[2]))) for v in pd.read_csv(ip_out_corr_fstring.format(vid_a), sep='\t')[[
                'id', 'corresponding_viewsetup_id', 'corresponding_id']].values]
        
    if not vid_b in res_singlevid:
        if os.path.exists(ip_out_fstring.format(vid_b)):
            res_singlevid[vid_b] = [v for v in pd.read_csv(ip_out_fstring.format(vid_b), sep='\t')[['x', 'y', 'z']].values]
            ids[vid_b] = len(res_singlevid[vid_b])
        if os.path.exists(ip_out_corr_fstring.format(vid_b)):
            res_corr[vid_b] = [(int(v[0]),(int(v[1]), int(v[2]))) for v in pd.read_csv(ip_out_corr_fstring.format(vid_b), sep='\t')[[
                'id', 'corresponding_viewsetup_id', 'corresponding_id']].values]
    
    for (ip_a, ip_b) in ips:
        new_a = False
        id_ma = add_array_set(res_singlevid[vid_a], ip_a, ids[vid_a])
        if id_ma == ids[vid_a]: # new ip
            ids[vid_a] += 1
            new_a = True
        
        new_b = False
        id_mb = add_array_set(res_singlevid[vid_b], ip_b, ids[vid_b])
        if id_mb == ids[vid_b]:
            new_b = True
            ids[vid_b] += 1
            
        if new_a or new_b:
            # remember correspondences
            res_corr[vid_a].append((id_ma, (vid_b, id_mb)))
            res_corr[vid_b].append((id_mb, (vid_a, id_ma)))
        
        # we already have both IPs, but are they correspondences yet?
        else:
            found = False
            for (id_ma1, (vid_b1, id_mb1)) in res_corr[vid_a]:
                if id_ma1 == id_ma and id_mb1 == id_mb:
                    found = True
            if not found:
                res_corr[vid_a].append((id_ma, (vid_b, id_mb)))
                res_corr[vid_b].append((id_mb, (vid_a, id_ma)))


# 4: Save IP files and print XML stub
Save the IPs anc correspondences created above, print an XML stub to include in ```dataset.xml``` if necessary

In [None]:
# write ip file
for vid in res_singlevid.keys():
    df = pd.DataFrame(np.hstack(
        (np.expand_dims(np.arange(len(res_singlevid[vid])),1),
         np.array(res_singlevid[vid]))
    ), columns=['id', 'x', 'y', 'z'])
    df['id'] = df['id'].astype(np.int)
    df.to_csv(ip_out_fstring.format(vid), index=False, sep='\t')
    
    # dump corresondences
    with open(ip_out_corr_fstring.format(vid), 'w') as fd:
        fd.write('id\tcorresponding_timepoint_id\tcorresponding_viewsetup_id\tcorresponding_label\tcorresponding_id\n')
        for (id_a, (vid_b, id_b)) in res_corr[vid]:
            fd.write(f'{id_a}\t0\t{vid_b}\tmanual\t{id_b}\n')
        

# create XML for new ips
xml = Element('ViewInterestPoints')
for vid in res_singlevid.keys():
    SubElement(xml, 'ViewInterestPointsFile',
               timepoint='0', setup=str(vid), label='manual', params='manually picked'
              ).text=rel_ip_out_fstring.format(vid)

# for pretty print: write to in-memory file, prettyprint with minidom
io = BytesIO()
ElementTree(xml).write(io)
io.seek(0)
print(parse(io).toprettyxml())

In [None]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

# manual inspection in 3d plots

pairs = list(res_ip.values())[1]
xs, ys, zs = [], [], []
for l1, l2 in pairs:
    d = l1 - l2
    xs.append(d[0])
    ys.append(d[1])
    zs.append(d[2])

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(xs, ys, zs)

# 1: re-create correspondences after splitting

In [None]:
from itertools import product, count, combinations
from multiprocessing import Pool

# old, non-split IPs
ip_old_fstring = '/Volumes/davidh-ssd/BS_TEST/interestpoints/tpId_0_viewSetupId_{}.manual.ip.txt'
ip_old_corr_fstring = '/Volumes/davidh-ssd/BS_TEST/interestpoints/tpId_0_viewSetupId_{}.manual.corr.txt'

# split IPs
ip_new_fstring = '/Volumes/davidh-ssd/BS_TEST/interestpoints/new_tpId_0_viewSetupId_{}.manual.ip.txt'
xml_new = '/Volumes/davidh-ssd/BS_TEST/dataset_prealign.split.xml'

# how much tiles a view was split into
fold_split = 8

# get all registrations from XML
root = ElementTree().parse(xml_new)
vrs = root.findall('./ViewRegistrations/ViewRegistration')
angles = root.findall('./SequenceDescription/ViewSetups/ViewSetup/attributes/angle')

def process_vid_pair(vid_a, vid_b):
    res = defaultdict(list)
    
    # try to load non-split IPs, ignore if we do not have IPs for an image
    try:
        df_old_a = pd.read_csv(ip_old_fstring.format(vid_a), sep='\t')
        df_old_b = pd.read_csv(ip_old_fstring.format(vid_b), sep='\t')
        corr_old_a = pd.read_csv(ip_old_corr_fstring.format(vid_a), sep='\t')
        corr_old_b = pd.read_csv(ip_old_corr_fstring.format(vid_b), sep='\t')
    except FileNotFoundError as e:
        return None
        
    # NB: dropping duplicates was necessary, as some correspondences were listed twice
    # TODO: remove from non-split dataset
    corr_old_a.drop_duplicates(inplace=True)
    corr_old_b.drop_duplicates(inplace=True)
    idxs = corr_old_a[corr_old_a['corresponding_viewsetup_id'] == vid_b]
    
    # just corresponding IPs in old views
    # same ordering for both tables!
    just_corr_a = df_old_a.iloc[list(idxs.id.values)]
    just_corr_b = df_old_b.iloc[list(idxs.corresponding_id.values)]
    just_corr_a = just_corr_a.reset_index(drop=True)
    just_corr_b = just_corr_b.reset_index(drop=True)

    # count number of good pairs (should be 4)
    ctr_old = 0
    
    for (vid_new_1, vid_new_2) in product(np.arange(vid_a*fold_split, vid_a*fold_split+fold_split),
                                         np.arange(vid_b*fold_split, vid_b*fold_split+fold_split)):

        # get relative index of views
        vid_n1_rel = (vid_new_1 - vid_a*fold_split)
        vid_n2_rel = (vid_new_2 - vid_b*fold_split)

        # ignore "diagonal" for multiview
        # views are flipped in y
        # we want to compare 1-2, 3-4, not 1-3
        if angles[vid_new_1].text != angles[vid_new_2].text and not (vid_n1_rel ^ vid_n2_rel) == 1:
            continue
                        
        # ignore diagonal
        # xor of relative view ids is power of 2 -> differ only along one direction
        # NB: only works for fold_split = 8
        if not((vid_n1_rel ^ vid_n2_rel) in [2**n for n in range(3)]):
            continue

        # get split IPs
        df_new_a = pd.read_csv(ip_new_fstring.format(vid_new_1), sep='\t')
        df_new_b = pd.read_csv(ip_new_fstring.format(vid_new_2), sep='\t')

        # get view transformations of split views
        vt1 = vrs[vid_new_1].findall('./ViewTransform')[-1]
        affine1 = np.zeros((4,4))
        affine1[:3] += np.array(list(vt1)[1].text.split()).astype(np.float).reshape((3,4))
        affine1[3,3] = 1.0

        vt2 = vrs[vid_new_2].findall('./ViewTransform')[-1]
        affine2 = np.zeros((4,4))
        affine2[:3] += np.array(list(vt2)[1].text.split()).astype(np.float).reshape((3,4))
        affine2[3,3] = 1.0

        # count number of correspondences
        ctr = 0
        for i1, v1 in enumerate(list(df_new_a[['x', 'y', 'z']].values)):
            v1t = (affine1.dot(np.array(list(v1) + [1]))[:3])
            for i2, v2 in enumerate(list(df_new_b[['x', 'y', 'z']].values)):
                v2t = (affine2.dot(np.array(list(v2) + [1]))[:3])

                # index of transformed IPs
                corra = just_corr_a.iloc[np.linalg.norm(just_corr_a[['x', 'y', 'z']].values - v1t, axis=1) < 0.001]
                ioa = corra.index
                corrb = just_corr_b.iloc[np.linalg.norm(just_corr_b[['x', 'y', 'z']].values - v2t, axis=1) < 0.001]
                iob = corrb.index

                """
                # caught by dropping duplicates above
                if ioa.size > 1:
                    print((vid_a, vid_b))
                    print(v1t)
                    print(just_corr_a.iloc[np.linalg.norm(just_corr_a[['x', 'y', 'z']].values - v1t, axis=1) < 0.001])
                    
                if iob.size > 1:
                    print((vid_a, vid_b))
                    print(v2t)
                    print(just_corr_b.iloc[np.linalg.norm(just_corr_b[['x', 'y', 'z']].values - v2t, axis=1) < 0.001])
                """
                
                # transformed IPs have the same index in original data -> correspondence re-found
                if ioa.size == 1 and iob.size == 1:
                    if ioa == iob:
                        ctr += 1
                        
                        # IP ids in split views
                        ida = df_new_a.iloc[i1].id
                        idb = df_new_b.iloc[i2].id
                        
                        res[(vid_new_1, vid_new_2)].append((ida, idb))

        # split pairs with enough points for an affine model -> good
        if ctr >= 4:
            ctr_old += 1
    
    '''
    if ctr_old > 0:
        print((vid_a, vid_b), ctr_old)
    '''
    
    return ctr_old, res

# doit in parallel, because inefficient
pool = Pool()
res = pool.starmap(process_vid_pair, combinations(range(24), 2))

'Done'

In [None]:
# how many old pairs have more than one good link 
list(zip(combinations(range(24), 2), res))
len([1 for i in res if i is not None and i[0] > 0])


# 2: create new correspondence files and save

In [None]:
# build new correspondences tables

pd.DataFrame().append({'id':1, 'corresponding_timepoint_id':1, 'corresponding_viewsetup_id':1,
                       'corresponding_label': 1, 'corresponding_id':1}, ignore_index=True)

res_per_view = defaultdict(lambda: pd.DataFrame(columns=['id', 'corresponding_timepoint_id', 'corresponding_viewsetup_id',
                       'corresponding_label', 'corresponding_id']))
for i in res:
    if i is None:
        continue
    if i[0] == 0:
        continue
    
    res_i = i[1]
    
    for ((va, vb), ips) in res_i.items():
        for (ipa, ipb) in ips:
            res_per_view[va] =  res_per_view[va].append(
                {'id': ipa,
                 'corresponding_timepoint_id':0,
                 'corresponding_viewsetup_id':vb,
                 'corresponding_label': 'manual',
                 'corresponding_id':ipb},
                ignore_index=True)
            res_per_view[vb] =  res_per_view[vb].append(
                {'id': ipb,
                 'corresponding_timepoint_id':0,
                 'corresponding_viewsetup_id':va,
                 'corresponding_label': 'manual',
                 'corresponding_id':ipa},
                ignore_index=True)

In [None]:
# save new correspondence table

ip_new_corr_fstring = '/Volumes/davidh-ssd/BS_TEST/interestpoints/new_tpId_0_viewSetupId_{}.manual.corr.txt'

for k, v in res_per_view.items():

    df = v.astype({'id':np.int, 'corresponding_timepoint_id':np.int, 'corresponding_viewsetup_id':np.int,
                       'corresponding_label': str, 'corresponding_id':np.int})
    
    df.to_csv(ip_new_corr_fstring.format(k), index=False, sep='\t')

--- DONE ---

# OLD/TEST code below

In [None]:
# any pairs with <4 correspondences?
for k,v in res_corr.items():
    counts = defaultdict(int)
    for (_, (cid, _)) in v:
        counts[cid] += 1
    for k2, c in counts.items():
        if c < 4:
            print(k, k2, c, '!!!')

In [None]:
res

In [None]:
# show all correspondences between the blocked tiles of a view pair
dict(zip(combinations(range(24), 2), res))[(10,17)]

In [None]:
xml_new = '/Volumes/davidh-ssd/BS_TEST/dataset_prealign.split.xml'

# how much tiles a view was split into
fold_split = 8

# get all registrations from XML
root = ElementTree().parse(xml_new)
vrs = root.findall('./SequenceDescription/ViewSetups/ViewSetup/attributes/angle')
vrs[0].text