In [1]:
from PatchCutter.ImagePreprocessor import ImagePreprocessor
from WordSpotter.ModelWrapper import DeepSoloWrapper

from PIL import Image
import os

from Utils import result_reader as rr
from Utils import bezier_utils as butils
from Utils.visualizer import PolygonVisualizer
from Utils import sampler

import numpy as np

import matplotlib.pyplot as plt

# INPUT
img_path = 'Test1/12148_btv1b530589233f1.jpg' # kiepert_1845.jpeg vandevelde_1846.jpeg test.jpg

# OUTPUT
task_name = os.path.splitext(os.path.basename(img_path))[0]

output_dir = f'Results/{task_name}'

# mkdir
os.makedirs(output_dir, exist_ok=True)

stacked_detection_path = os.path.join(output_dir, f'stacked_detections.json')

flattened_detection_path = os.path.join(output_dir, f'flattened_detections.json')

grouper_graph_path = os.path.join(output_dir, f'grouper_graph.gexf')

toponym_detection_path = os.path.join(output_dir, f'toponym_detections.json')


# MODELS
model_cfg = 'WordSpotter/models/config_96voc.yaml'
model_weights = 'WordSpotter/models/finetune_v2/model.pth'
grouper_model_path = 'Grouper/grouper_model_v1_epoch2.pth' # grouper_model_epoch3.pth  grouper_model_v1_epoch2.pth
# Optional
deepfont_encoder_path = 'StyleEncoder/DeepFontEncoder_full.pth'

## Step 1: Word Spotting

### Operations

In [31]:
from WordSpotting import pyramid_scan

spotter = DeepSoloWrapper(model_cfg, model_weights, score_threshold=0.4)

_ = pyramid_scan(img_path, stacked_detection_path, spotter, num_layers = 1, save_visualization=True)

Cropping patches: [13]
preprocessing done
Rotating images by 0 degrees


100%|██████████| 22/22 [00:34<00:00,  1.55s/it]


Saving final results


## Step 2: Flattening

### Operations

In [2]:
from Flattening import aggregate_closest_results, normalize_adhesive

result = rr.read_json(stacked_detection_path)
        
grouped_results, ambiguity = aggregate_closest_results(result, sample_count=15, evaluate_overlapping="any")

new_results = normalize_adhesive(grouped_results, ambiguity, Image.open('Input/kiepert_1845.jpeg'))

rr.save_json(new_results, flattened_detection_path)

100%|██████████| 2805/2805 [00:44<00:00, 63.59it/s]


### Visualization

In [3]:
vis = PolygonVisualizer()
vis.canvas_from_image(Image.open(img_path))
vis.draw(new_results).save(flattened_detection_path.replace('.json', '.jpg'))

## Optional: Word Style Representation

### Operations

In [6]:
from StyleEmbedding import generate_style_embeddings
from StyleEncoder.DeepFont import DeepFontEncoder, EncodeFontBatch, load_model

results = rr.read_json(flattened_detection_path)

deepfont_encoder = load_model(deepfont_encoder_path)

results = generate_style_embeddings(results, Image.open(img_path), deepfont_encoder)

rr.save_json(results, flattened_detection_path)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


## Step 3: Toponym Assignment

### Functions

In [2]:
from Grouper.GrouperCaller_v1 import *
from ToponymsAssignment import group_toponyms, toponym_from_graph_strong_component

### Operations

In [3]:
results = rr.read_json(flattened_detection_path)

use_style_embeddings = False
if 'style_embedding' in results[0].keys():
    use_style_embeddings = True

grouper = GrouperCaller(grouper_model_path)

directed_graph, order_observations = group_toponyms(results, grouper, use_style_embeddings=use_style_embeddings, batch_size=128)

rr.save_toponym_graph(directed_graph, grouper_graph_path)
rr.save_json_nested(order_observations, grouper_graph_path.replace('.gexf', '.json'))

 44%|████▍     | 2093/4753 [01:28<01:52, 23.70it/s]


KeyboardInterrupt: 

In [4]:
results = rr.read_json(flattened_detection_path)
directed_graph = rr.read_toponym_graph(grouper_graph_path)
order_observations = rr.read_json_nested(grouper_graph_path.replace('.gexf', '.json'))

toponyms = toponym_from_graph_strong_component(results, directed_graph, order_observations)

rr.save_json_nested(toponyms, toponym_detection_path)

### Visualization

In [6]:
toponyms = rr.read_json_nested(toponym_detection_path)

vis = PolygonVisualizer()
vis.canvas_from_image(Image.open(img_path))
vis.draw_multiple(toponyms).save(toponym_detection_path.replace('.json', '.jpg'))

In [5]:
toponyms = rr.read_json_nested(toponym_detection_path)

toponyms_final = rr.extract_toponyms_from_result_groups(toponyms)

print(len(toponyms_final))

vis = PolygonVisualizer()
vis.canvas_from_image(Image.open(img_path))
vis.draw_toponyms(toponyms_final).save(toponym_detection_path.replace('.json', '.jpg'))


462
