In [1]:
import DTW_utils as utils 
import DTW as dtw
import pandas as pd 
import pickle
import numpy as np


In [2]:
# load train_db
with open("train_features/train_db.pkl", "rb") as f:
    train_db = pickle.load(f)


In [4]:
train_db[0]

{'loc': '271-02-01',
 'word': 'L-e-t-t-e-r-s-s_cm',
 'features': array([[0.    , 0.9875, 0.    , ..., 0.    , 0.    , 0.    ],
        [0.    , 0.9875, 0.    , ..., 0.    , 0.    , 0.    ],
        [0.    , 0.9875, 0.    , ..., 0.    , 0.    , 0.    ],
        ...,
        [0.    , 0.9875, 0.    , ..., 0.    , 0.    , 0.    ],
        [0.    , 0.9875, 0.    , ..., 0.    , 0.    , 0.    ],
        [0.    , 0.9875, 0.    , ..., 0.    , 0.    , 0.    ]])}

Next steps: 

Chose keyword, get all image from train set and take one, that represents this keyword well. 
search with the image from the train set the key word in the validation set by scanning images from validation set, take with treshhold.
Evaluate how many words are correct and how many not as well as how many of the actual words were found.

In [5]:
# load image with specific loc 

import os
from PIL import Image

def load_train_image(loc, folder="train_words"):
    """Loads cropped image."""
    path = os.path.join(folder, f"{loc}.png")
    if not os.path.exists(path):
        raise FileNotFoundError(f"No image found for loc={loc}")
    return Image.open(path).convert("RGB")


In [6]:
def get_keyword_instances(train_db, keyword):
    """Get all entries from train_db with keyword."""
    return [entry for entry in train_db if entry["word"] == keyword]


def keyword_medoid(train_db, keyword, image_folder="train_words"):
    """
    get most representative image for this keyword.
    """
    instances = get_keyword_instances(train_db, keyword)
    n = len(instances)

    if n == 0:
        raise ValueError(f"Keyword '{keyword}' not found in train_db.")

    if n == 1:
        entry = instances[0]
        best_img = load_train_image(entry["loc"], folder=image_folder)
        return best_img, entry["features"], entry

    dist_mat = np.zeros((n, n), dtype=float)

    for i in range(n):
        for j in range(i + 1, n):
            d = dtw.DTW(instances[i]["features"], instances[j]["features"])
            dist_mat[i, j] = d
            dist_mat[j, i] = d

    sums = dist_mat.sum(axis=1)

    # idx with min dist to others 
    best_idx = np.argmin(sums)
    best_entry = instances[best_idx]
    
    best_img = load_train_image(best_entry["loc"], folder=image_folder)
    best_features = best_entry["features"]

    return best_img, best_features, best_entry

In [7]:
keyword = "c-a-r-e-f-u-l"
proto_img, proto_feat, info = keyword_medoid(train_db, keyword, image_folder="train_words")



In [8]:
proto_img.show()

In [9]:
# load validation_db 
with open("validation_features/validation_db.pkl", "rb") as f:
    val_db = pickle.load(f)

In [10]:
# search in the whole validation_db 

results = dtw.search(proto_feat, val_db)

In [None]:
distances

In [15]:
# seems to be pretty bad :(
filtered_dist_idx = [(entry[0], i) for i, entry in enumerate(results) if entry[2] == keyword]

print(filtered_dist_idx[:10])  

[(45.14055008314365, 74)]


In [16]:
thresholds = [10, 15, 20, 25, 30, 35, 40]


In [26]:
import pandas as pd
import numpy as np

def evaluate_with_thresholds(results, keyword, thresholds):
    N_pos = sum(1 for (_, _, w, _) in results if w == keyword)
    
    rows = []

    for th in thresholds:
        selected = [r for r in results if r[0] <= th]

        tp = sum(1 for (_, _, w, _) in selected if w == keyword)
        fp = sum(1 for (_, _, w, _) in selected if w != keyword)
        fn = N_pos - tp

        precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
        recall = tp / N_pos if N_pos > 0 else 0.0

        rows.append({
            "keyword": keyword,
            "threshold": th,
            "precision": precision,
            "recall": recall,
            "TP": tp,
            "FP": fp,
            "FN": fn,
            "N_pos": N_pos
        })

    return pd.DataFrame(rows)



In [29]:
eval_careful = evaluate_with_thresholds(results, keyword, thresholds)

In [30]:
eval_careful

Unnamed: 0,keyword,threshold,precision,recall,TP,FP,FN,N_pos
0,c-a-r-e-f-u-l,10,0.0,0.0,0,0,1,1
1,c-a-r-e-f-u-l,15,0.0,0.0,0,0,1,1
2,c-a-r-e-f-u-l,20,0.0,0.0,0,0,1,1
3,c-a-r-e-f-u-l,25,0.0,0.0,0,0,1,1
4,c-a-r-e-f-u-l,30,0.0,0.0,0,0,1,1
5,c-a-r-e-f-u-l,35,0.0,0.0,0,0,1,1
6,c-a-r-e-f-u-l,40,0.0,0.0,0,8,1,1


The distance for all images is greater than 35. 8 images have smaller distance to train img than 40 but are all FP 

In [32]:
keyword = "J-o-h-n"
proto_img, proto_feat, info = keyword_medoid(train_db, keyword, image_folder="train_words")

results_john = dtw.search(proto_feat, val_db)

In [36]:
thresholds = [40, 45, 50, 55, 60]

In [37]:
proto_img.show()

In [38]:
eval_john = evaluate_with_thresholds(results_john, keyword, thresholds)

In [39]:
eval_john

Unnamed: 0,keyword,threshold,precision,recall,TP,FP,FN,N_pos
0,J-o-h-n,40,0.333333,0.333333,1,2,2,3
1,J-o-h-n,45,0.066667,0.333333,1,14,2,3
2,J-o-h-n,50,0.022727,0.333333,1,43,2,3
3,J-o-h-n,55,0.01087,0.333333,1,91,2,3
4,J-o-h-n,60,0.005587,0.333333,1,178,2,3
