In [43]:
import numpy as np
import pandas as pd
from collections import namedtuple, defaultdict
import os
import pickle
import sys
import csv 
import cv2
import matplotlib.pyplot as plt
from math import sqrt

In [178]:
def getErrors(dots, grountruth):
    errors = []
    for i, sealion in enumerate(sealion_types):
        gt = grountruth[sealion].get_values()[0]
        detected = len(dots[i])
        errors.append(detected - gt)
    return errors

In [179]:
def merge(dotsA, dotsB, errorsA, errorsB):
    mergedDots = []
    mergedErrors = []
    for i, sealion in enumerate(sealion_types): 
        if np.abs(errorsA[i]) <= np.abs(errorsB[i]):
            mergedDots.append(dotsA[i])
            mergedErrors.append(errorsA[i])
        else:
            mergedDots.append(dotsB[i])
            mergedErrors.append(errorsB[i])
    return mergedDots, mergedErrors

In [180]:
def compare(dotsA, dotsB):
    differences = []
    for i, _ in enumerate(sealion_types):
        differences.append(compareCoords(dotsA[i], dotsB[i]))
    return differences

In [205]:
def compareCoords(coordsA, coordsB):
    threshold = 3*3+3*3
    distances = np.zeros((len(coordsA), len(coordsB)))

    for a, cA in enumerate(coordsA):
        for b, cB in enumerate(coordsB):
            dx = cA[0] - cB[0]
            dy = cA[1] - cB[1]
            distances[a,b] = dx*dx + dy*dy
    
    mi = min(len(coordsA), len(coordsB))
    ma = max(len(coordsA), len(coordsB))
    umatchedA = set(range(len(coordsA)))
    umatchedB = set(range(len(coordsB)))
    mapping = dict()

    for i in range(mi):
        best = np.argmin(distances.ravel())
        best_row = best // len(coordsB)
        best_col = best % len(coordsB)
        if distances[best_row, best_col] < threshold:
            mapping[best_col] = best_row
            distances[best_row, :] = 1e12
            distances[:, best_col] = 1e12
            umatchedA.remove(best_row)
            umatchedB.remove(best_col)
        else:
            break
    
    matched = len(mapping)
    unmatched = ma - matched
    if len(umatchedA) + len(umatchedB) != 0:
        print("Unmatched points: ")
        if umatchedA:
            print("From A: ")
            for i in umatchedA:
                print(coordsA[i])
        if umatchedB:
            print("From B: ")
            for i in umatchedB:
                print(coordsB[i])
    return unmatched

In [77]:
df = pd.read_csv("../data/sealion/correct_coordinates.csv")

In [78]:
sealion_types = ["adult_males", 
    "subadult_males",
    "adult_females",
    "juveniles",
    "pups"]

In [79]:
sealion_index = dict()
for i, s in enumerate(sealion_types):
    sealion_index[s] = i

In [80]:
dotsC = dict()

In [81]:
def populate_dots(r):
    filename = r["filename"]
    train_id, _ = os.path.splitext(filename)
    train_id = int(train_id)
    x, y = r["x_coord"], r["y_coord"]
    x, y = int(round(x)), int(round(y))
    index = sealion_index[r["category"]]
    
    if train_id not in dotsC:
        dotsC[train_id] = [[], [], [], [], []]
    dotsC[train_id][index].append((x, y))

In [82]:
df.head()

Unnamed: 0.1,Unnamed: 0,filename,y_coord,x_coord,category
0,0,0.jpg,31.0,4238.0,pups
1,1,0.jpg,41.0,4240.0,adult_females
2,2,0.jpg,59.0,4722.0,juveniles
3,3,0.jpg,74.0,3833.0,adult_females
4,4,0.jpg,80.0,2756.0,pups


In [83]:
_ = df.apply(populate_dots, axis=1)

In [84]:
for train_id, dots in dotsC.items():
    with open("../data/sealion/TrainDotsB/{}C.pkl".format(train_id), "wb") as pfile:
        pickle.dump(dots, pfile, pickle.HIGHEST_PROTOCOL)

In [85]:
def get_train_id(filename):
    directory, basename = os.path.split(filename)
    train_id, ext = os.path.splitext(basename)
    return int(train_id)

In [105]:
def reprocess_all(train_ids=None):
    train_dir = "/home/lowik/sealion/data/sealion/Train/"
    dotted_dir = "/home/lowik/sealion/data/sealion/TrainDotted/"
    mask_dir = "/home/lowik/sealion/data/sealion/TrainMask/"
    debug_dir = "/home/lowik/sealion/data/sealion/TrainDebug/"
    dots_dir = "/home/lowik/sealion/data/sealion/TrainDotsB/"
    os.makedirs(debug_dir, exist_ok=True)
    os.makedirs(dots_dir, exist_ok=True)
    df = pd.read_csv(os.path.join(train_dir, "train.csv"))
    train_id_with_errors = []
    train_id_no_diff = []
    train_id_with_diff = []
    train_id_ok = []
    total_errors = dict()
    files_to_process = [filename for filename in os.listdir(train_dir) if filename.endswith("jpg")]
    for filename in files_to_process:
        train_id = get_train_id(filename)
        if train_ids is None or train_id in train_ids:
            #im_bgr = cv2.imread(os.path.join(train_dir, filename))
            im_dotted_bgr = cv2.imread(os.path.join(dotted_dir, filename))
            #im_mask = cv2.imread(os.path.join(mask_dir, filename), cv2.IMREAD_GRAYSCALE)
            
            
            with open(os.path.join(dots_dir, str(train_id) + ".pkl"), "rb") as ofile:
                    dotsA = pickle.load(ofile)
                
            if os.path.exists(os.path.join(dots_dir, str(train_id) + "C.pkl")):
                with open(os.path.join(dots_dir, str(train_id) + "C.pkl"), "rb") as ofile:
                    dotsB = pickle.load(ofile)
            else:
                dotsB = [[],[],[],[],[]]
            
            grountruth = df[df.train_id==int(train_id)]
            errorsA = getErrors(dotsA, grountruth)
            errorsB = getErrors(dotsB, grountruth)
            
            difference = compare(dotsA, dotsB)
            if np.sum(np.abs(difference)) == 0:
                print("[{i}] - No Difference A vs B".format(i=train_id))
                train_id_no_diff.append(train_id)
            else:
                print("[{i}] - Difference A vs B: {d}".format(i=train_id, d=difference))
                train_id_with_diff.append(train_id)
            
            mergeDots, mergeErrors = merge(dotsA, dotsB, errorsA, errorsB)
            sumErrors = np.sum(np.abs(mergeErrors))

            im_draw = draw_detected(im_dotted_bgr, mergeDots)
            cv2.imwrite(os.path.join(debug_dir, str(train_id) + "_merged.jpg"), im_draw)
            total_errors[train_id] = sumErrors
            if sumErrors > 0:
                train_id_with_errors.append(train_id)
                print("[{i}] - Total errors: {total} - {err}".format(i=train_id, total=sumErrors, err=mergeErrors))
            else:
                train_id_ok.append(train_id)
                print("[{i}] - ok".format(i=train_id))

            # Save dots
            with open(os.path.join(dots_dir, str(train_id) + "_merged.pkl"), "wb") as ofile:
                pickle.dump(mergeDots, ofile, pickle.HIGHEST_PROTOCOL)
    return train_id_with_errors, train_id_ok, train_id_no_diff, train_id_with_diff, total_errors

In [106]:
def draw_detected(im, dots):
    im_draw = im.copy()
    n = 5
    colors_rgb = [(1, 7, 244), (243, 17, 242), (5, 41, 79), (177, 54, 26), (26, 171, 43)]
    for i, centroids in enumerate(dots):
        for c in centroids:
            center = (int(round(c[0])), int(round(c[1])))
            cv2.circle(im_draw, center, 9, colors_rgb[i], 2)
    return im_draw

In [107]:
train_id_with_errors, train_id_ok, train_id_no_diff, train_id_with_diff, total_errors = reprocess_all()

[866] - No Difference A vs B
[866] - ok
[867] - No Difference A vs B
[867] - ok
[868] - No Difference A vs B
[868] - ok
[869] - Difference A vs B: [2, 3, 0, 0, 0]
[869] - ok
[87] - Difference A vs B: [0, 0, 1, 0, 0]
[87] - ok
[870] - No Difference A vs B
[870] - ok
[871] - No Difference A vs B
[871] - Total errors: 7 - [0, 0, -3, 0, -4]
[872] - No Difference A vs B
[872] - ok
[873] - No Difference A vs B
[873] - Total errors: 1 - [0, 0, 0, -1, 0]
[874] - No Difference A vs B
[874] - ok
[875] - No Difference A vs B
[875] - ok
[876] - No Difference A vs B
[876] - ok
[877] - No Difference A vs B
[877] - ok
[878] - No Difference A vs B
[878] - Total errors: 1 - [0, -1, 0, 0, 0]
[879] - No Difference A vs B
[879] - ok
[88] - No Difference A vs B
[88] - ok
[880] - No Difference A vs B
[880] - ok
[881] - Difference A vs B: [1, 0, 4, 0, 0]
[881] - Total errors: 1 - [0, 0, 0, -1, 0]
[882] - Difference A vs B: [2, 4, 5, 16, 0]
[882] - Total errors: 1 - [0, 0, -1, 0, 0]
[883] - No Difference A vs

[81] - Total errors: 19 - [-1, -3, -15, 0, 0]
[810] - No Difference A vs B
[810] - ok
[811] - No Difference A vs B
[811] - Total errors: 24 - [-7, -5, -12, 0, 0]
[812] - No Difference A vs B
[812] - ok
[813] - No Difference A vs B
[813] - Total errors: 1 - [0, 0, 0, -1, 0]
[814] - No Difference A vs B
[814] - Total errors: 1 - [0, 0, 0, -1, 0]
[815] - No Difference A vs B
[815] - ok
[816] - No Difference A vs B
[816] - ok
[817] - No Difference A vs B
[817] - ok
[818] - No Difference A vs B
[818] - ok
[819] - No Difference A vs B
[819] - ok
[82] - No Difference A vs B
[82] - ok
[820] - No Difference A vs B
[820] - ok
[821] - No Difference A vs B
[821] - ok
[822] - Difference A vs B: [0, 0, 1, 0, 0]
[822] - Total errors: 1 - [0, 0, 0, -1, 0]
[823] - No Difference A vs B
[823] - Total errors: 3 - [0, -3, 0, 0, 0]
[824] - No Difference A vs B
[824] - ok
[825] - No Difference A vs B
[825] - ok
[826] - No Difference A vs B
[826] - ok
[827] - Difference A vs B: [4, 0, 0, 0, 0]
[827] - Total e

[743] - ok
[744] - Difference A vs B: [0, 0, 0, 1, 0]
[744] - Total errors: 1 - [0, 0, 0, -1, 0]
[745] - No Difference A vs B
[745] - Total errors: 1 - [0, 0, -1, 0, 0]
[746] - No Difference A vs B
[746] - ok
[747] - No Difference A vs B
[747] - ok
[748] - No Difference A vs B
[748] - Total errors: 1 - [0, -1, 0, 0, 0]
[749] - No Difference A vs B
[749] - ok
[75] - No Difference A vs B
[75] - ok
[750] - No Difference A vs B
[750] - Total errors: 4 - [-2, 0, 0, -2, 0]
[751] - No Difference A vs B
[751] - Total errors: 5 - [0, -3, 0, -2, 0]
[752] - No Difference A vs B
[752] - ok
[753] - No Difference A vs B
[753] - ok
[754] - No Difference A vs B
[754] - Total errors: 2 - [0, 0, 0, -2, 0]
[755] - No Difference A vs B
[755] - ok
[536] - No Difference A vs B
[536] - ok
[537] - No Difference A vs B
[537] - ok
[538] - Difference A vs B: [1, 0, 2, 0, 0]
[538] - Total errors: 1 - [0, -1, 0, 0, 0]
[539] - No Difference A vs B
[539] - Total errors: 1 - [-1, 0, 0, 0, 0]
[54] - No Difference A vs

[454] - ok
[455] - No Difference A vs B
[455] - ok
[456] - No Difference A vs B
[456] - ok
[457] - No Difference A vs B
[457] - ok
[458] - No Difference A vs B
[458] - ok
[459] - No Difference A vs B
[459] - ok
[46] - No Difference A vs B
[46] - ok
[460] - No Difference A vs B
[460] - Total errors: 1 - [0, 0, 0, -1, 0]
[461] - No Difference A vs B
[461] - ok
[462] - Difference A vs B: [0, 0, 1, 0, 0]
[462] - Total errors: 2 - [-1, 0, 0, 0, -1]
[463] - No Difference A vs B
[463] - ok
[464] - No Difference A vs B
[464] - ok
[465] - No Difference A vs B
[465] - Total errors: 1 - [0, 0, 0, -1, 0]
[466] - No Difference A vs B
[466] - ok
[467] - No Difference A vs B
[467] - Total errors: 2 - [0, 0, 0, -2, 0]
[468] - No Difference A vs B
[468] - ok
[469] - Difference A vs B: [0, 2, 8, 7, 0]
[469] - ok
[47] - No Difference A vs B
[47] - Total errors: 2 - [0, -2, 0, 0, 0]
[470] - No Difference A vs B
[470] - ok
[471] - No Difference A vs B
[471] - ok
[472] - No Difference A vs B
[472] - ok
[473

[382] - ok
[383] - Difference A vs B: [0, 0, 0, 1, 0]
[383] - Total errors: 2 - [0, -2, 0, 0, 0]
[384] - No Difference A vs B
[384] - Total errors: 92 - [-7, -5, -61, -19, 0]
[385] - No Difference A vs B
[385] - ok
[386] - No Difference A vs B
[386] - Total errors: 2 - [0, -2, 0, 0, 0]
[387] - No Difference A vs B
[387] - ok
[388] - No Difference A vs B
[388] - Total errors: 1 - [-1, 0, 0, 0, 0]
[389] - No Difference A vs B
[389] - ok
[39] - No Difference A vs B
[39] - ok
[390] - No Difference A vs B
[390] - ok
[391] - No Difference A vs B
[391] - ok
[392] - No Difference A vs B
[392] - ok
[393] - No Difference A vs B
[393] - ok
[394] - No Difference A vs B
[394] - Total errors: 4 - [0, -4, 0, 0, 0]
[395] - Difference A vs B: [0, 0, 2, 0, 0]
[395] - Total errors: 2 - [0, -1, 0, -1, 0]
[396] - No Difference A vs B
[396] - ok
[397] - No Difference A vs B
[397] - ok
[398] - No Difference A vs B
[398] - Total errors: 3 - [0, -3, 0, 0, 0]
[399] - No Difference A vs B
[399] - ok
[4] - No Dif

[31] - No Difference A vs B
[31] - ok
[310] - Difference A vs B: [0, 0, 0, 1, 0]
[310] - ok
[311] - Difference A vs B: [2, 1, 14, 9, 3]
[311] - ok
[312] - No Difference A vs B
[312] - Total errors: 1 - [0, 0, -1, 0, 0]
[313] - No Difference A vs B
[313] - ok
[314] - No Difference A vs B
[314] - ok
[315] - No Difference A vs B
[315] - ok
[0] - No Difference A vs B
[0] - ok
[1] - No Difference A vs B
[1] - ok
[10] - No Difference A vs B
[10] - ok
[100] - No Difference A vs B
[100] - ok
[101] - No Difference A vs B
[101] - ok
[102] - No Difference A vs B
[102] - ok
[103] - No Difference A vs B
[103] - ok
[104] - No Difference A vs B
[104] - ok
[105] - No Difference A vs B
[105] - Total errors: 5 - [0, 0, -1, -4, 0]
[106] - No Difference A vs B
[106] - ok
[107] - No Difference A vs B
[107] - ok
[108] - No Difference A vs B
[108] - Total errors: 1 - [-1, 0, 0, 0, 0]
[109] - No Difference A vs B
[109] - ok
[11] - No Difference A vs B
[11] - Total errors: 2 - [0, -2, 0, 0, 0]
[110] - Differen

In [109]:
import operator

In [111]:
sorted_errors = sorted(total_errors.items(), key=operator.itemgetter(1))

In [113]:
sorted_errors[::-1]

[(34, 350),
 (406, 122),
 (9, 113),
 (384, 92),
 (507, 88),
 (3, 85),
 (767, 80),
 (234, 73),
 (7, 60),
 (292, 58),
 (66, 55),
 (903, 54),
 (712, 53),
 (593, 53),
 (909, 48),
 (151, 47),
 (21, 40),
 (331, 34),
 (97, 30),
 (490, 29),
 (811, 24),
 (268, 22),
 (529, 20),
 (184, 19),
 (81, 19),
 (426, 16),
 (857, 14),
 (344, 12),
 (905, 10),
 (643, 9),
 (587, 9),
 (585, 8),
 (516, 8),
 (499, 8),
 (899, 7),
 (871, 7),
 (837, 7),
 (621, 7),
 (412, 7),
 (207, 7),
 (129, 7),
 (15, 7),
 (918, 6),
 (629, 6),
 (598, 6),
 (437, 6),
 (67, 6),
 (776, 5),
 (751, 5),
 (265, 5),
 (105, 5),
 (38, 5),
 (941, 4),
 (925, 4),
 (910, 4),
 (827, 4),
 (750, 4),
 (607, 4),
 (500, 4),
 (476, 4),
 (421, 4),
 (394, 4),
 (297, 4),
 (252, 4),
 (215, 4),
 (122, 4),
 (83, 4),
 (906, 3),
 (889, 3),
 (823, 3),
 (781, 3),
 (761, 3),
 (759, 3),
 (739, 3),
 (724, 3),
 (700, 3),
 (595, 3),
 (574, 3),
 (571, 3),
 (524, 3),
 (510, 3),
 (509, 3),
 (495, 3),
 (473, 3),
 (409, 3),
 (405, 3),
 (398, 3),
 (362, 3),
 (359, 3),
 (34

In [141]:
df_c = pd.read_csv("../data/sealion/correct_coordinates.csv")

In [162]:
diff_with_C = []
for train_id in train_id_with_diff:
    if os.path.exists("/home/lowik/sealion/data/sealion/TrainDotsB/{}C.pkl".format(train_id)):
        diff_with_C.append(train_id)

In [163]:
len(diff_with_C)

79

In [164]:
sorted(diff_with_C)

[58,
 66,
 80,
 87,
 110,
 112,
 122,
 127,
 148,
 170,
 174,
 177,
 186,
 187,
 197,
 235,
 240,
 259,
 270,
 275,
 277,
 291,
 292,
 310,
 323,
 328,
 330,
 338,
 359,
 361,
 368,
 383,
 395,
 403,
 416,
 418,
 420,
 431,
 437,
 452,
 462,
 495,
 526,
 529,
 538,
 566,
 578,
 584,
 587,
 604,
 629,
 645,
 658,
 671,
 684,
 699,
 704,
 715,
 741,
 742,
 744,
 759,
 773,
 791,
 803,
 804,
 822,
 827,
 828,
 845,
 881,
 886,
 889,
 912,
 914,
 920,
 924,
 939,
 941]

In [166]:
df = pd.read_csv(os.path.join("/home/lowik/sealion/data/sealion/Train/", "train.csv"))

In [None]:
c_is_good = [58, 66, 87, 110, 122, 148, 187, 235, 259, 270, 275, 277, 292, 310, 323, 328, 330, 338, 359, 368, 383, 395, 403, ]

In [203]:
def check(train_id):
    im = cv2.imread("../data/sealion/TrainDotted/{}.jpg".format(train_id))
    with open("../data/sealion/TrainDotsB/{}.pkl".format(train_id), "rb") as pfile:
        dots = pickle.load(pfile)
    with open("../data/sealion/TrainDotsB/{}C.pkl".format(train_id), "rb") as pfile:
        dotsC = pickle.load(pfile)
    im_draw = draw_detected(im.copy(), dots)
    im_drawC = draw_detected(im.copy(), dotsC)
    cv2.imwrite("../data/check.jpg", im_draw)
    cv2.imwrite("../data/checkC.jpg", im_drawC)
    compare(dots, dotsC)
    print(" ".join([str(len(d)) for d in dots]))    
    print(" ".join([str(len(d)) for d in dotsC]))    
    print(df[df.train_id == train_id])

In [240]:
check(418)

Unmatched points: 
From A: 
(1622, 1032)
(1715, 1151)
(1510, 1229)
10 6 139 150 0
10 6 136 150 0
     train_id  adult_males  subadult_males  adult_females  juveniles  pups
418       418           11               6            139        150     0


In [167]:
with open("../data/sealion/TrainDotsB/58.pkl", "rb") as pfile:
    dots = pickle.load(pfile)

In [168]:
print(" ".join([str(len(d)) for d in dots]))
    

37 17 143 71 145


In [169]:
df[df.train_id == 58]

Unnamed: 0,train_id,adult_males,subadult_males,adult_females,juveniles,pups
58,58,36,17,143,71,145
