In [None]:
! git clone https://github.com/kwadraterry/NORPPA.git

In [None]:
import sys
sys.path.append('./NORPPA')

In [None]:
! pip install -r ./NORPPA/requirements.txt

In [None]:
!pip install -q condacolab

import condacolab
condacolab.install()

In [None]:
!which conda

In [None]:
! conda install conda-forge::vlfeat

In [None]:
! conda install -c conda-forge cyvlfeat

In [None]:
import condacolab

In [None]:
pip install kornia_moons

In [None]:
pip install kornia

## Setup autoreload, warnings and helper functions

In [None]:
%load_ext autoreload
%autoreload 2

from IPython.display import display, Markdown
def print_heading(string):
    display(Markdown(f"# {string}"))
def print_subheading(string):
    display(Markdown(f"## {string}"))

## Set the visibility of cuda devices (in case your system contains more than one)

In [None]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
# %env CUDA_VISIBLE_DEVICES=""
%env CUDA_VISIBLE_DEVICES=2

## Imports

In [None]:
%matplotlib inline
# import tensorflow as tf
from config import config
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
import zipfile
import wget

import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

from torchvision.datasets.utils import download_url
from datasets import COCOImageDataset, DatasetSlice, SimpleDataset

from norppa_tools import print_topk_accuracy, print_step, apply_pipeline, crop_step, crop_step_sequential, curry, curry_sequential, apply_sequential, apply_pipeline_dataset, get_save_step, apply_sequential, compose_sequential, calculate_accuracy, resize_dataset
from tonemapping.tonemapping import tonemap, tonemap_step
from segmentation.segmentation import segment
from pattern_extraction.extract_pattern import extract_pattern
from reidentification.identify import encode_single, encode_pipeline, encode_dataset, identify, identify_single, getDISK, getKeyNetAffNetHardNet
from reidentification.find_matches import find_matches
from reidentification.visualisation import visualise_match
from reidentification.identify import apply_geometric



## Create a configuration file.
You can change the default parameters in config.py

In [None]:
cfg = config()

segment_step = curry_sequential(segment, cfg["seem"], instance_segmentation=False)
extract_pattern_step = curry_sequential(extract_pattern, model=cfg["unet"])

In [None]:
from google.colab import drive
drive.mount('/content/drive/')
#J'ai téléchargé sur mon drive les données SealID

## Download the dataset

In [None]:
from config import config
#une petite modif sur le fichier config a été faite pour qu'ici ça marche !
dataset_dir = Path(cfg["dataset_dir"])

if not dataset_dir.exists():

  print("Download and extract dataset")
        # Get a single use download link from https://etsin.fairdata.fi/dataset/22b5191e-f24b-4457-93d3-95797c900fc0/data
        # You will only need "full images.zip" for the reidentification, generate a link to that.
  dataset_url = ""

  print(f'Creating directory "{dataset_dir}"')
  dataset_dir.mkdir(parents=True, exist_ok=True)

  file = wget.download(dataset_url.replace(" ", "%20"), out=str(dataset_dir))
  print()
  print(f'Extracting "{file}"')
  zip_f = zipfile.ZipFile(file, 'r')
  zip_f.extractall(dataset_dir)
  zip_f.close()
  Path(file).unlink()

print(f'SealID dataset is ready')

## Create dataset variables

In [None]:
! unzip "/content/drive/MyDrive/SealID/full images.zip"

In [None]:
#ICI : Il faut déplacer le dossier full images dans NORPPA/data (peut-être relancer le unzip une fois le déplacement, car ça bug)
dataset_dir = Path(cfg["dataset_dir"])/"full images"


database_dataset = COCOImageDataset(dataset_dir/"source_database", dataset_dir/"annotation.csv", "database")
query_dataset = COCOImageDataset(dataset_dir/"source_query", dataset_dir/"annotation.csv", "query")


segmented_database_dataset = COCOImageDataset(dataset_dir/"segmented_database", dataset_dir/"annotation.csv", "database")
segmented_query_dataset = COCOImageDataset(dataset_dir/"segmented_query", dataset_dir/"annotation.csv", "query")

# tonemapped_database_dataset = COCOImageDataset(dataset_dir/"tonemapped_segmented_database", dataset_dir/"annotation.csv", "database")
# tonemapped_query_dataset = COCOImageDataset(dataset_dir/"tonemapped_segmented_query", dataset_dir/"annotation.csv", "query")

img_data = query_dataset[0]
pass

## Show an image from the dataset

In [None]:
img, label = img_data

print_heading("Input image")
plt.imshow(img)
plt.show()

## Apply tonemapping

In [None]:
! sudo apt-get install pfstmo

In [None]:
# Remember to install pfstmo package with
# sudo apt-get install pfstmo

tonemapped_img = tonemap(img)

print_heading("Tonemapped image")
plt.imshow(tonemapped_img)
plt.show()

## Segment and crop an image

In [None]:
segment_step = curry_sequential(segment, cfg["seem"], instance_segmentation=False)

# segmented_img, label = apply_pipeline((tonemapped_img, label), [segment_step, crop_step_sequential])[0]
segmented_img, label = apply_pipeline((img, label), [segment_step, crop_step_sequential])[0]

print_heading("Segmented image")
plt.imshow(segmented_img)
plt.show()

## Extract pattern from an image

In [None]:
extract_pattern_step = curry_sequential(extract_pattern, model=cfg["unet"])

pattern_img, label = apply_pipeline((segmented_img, label), [extract_pattern_step])[0]

print_heading("Pattern image")
plt.imshow(pattern_img)
plt.show()

## Encode an image

In [None]:
encoded_img = encode_single((pattern_img,label), cfg)
print("Encoded image")

## Create a small database for testing
This example uses a very simple database implemented as a Python class. The database might take a lot of space if all images are used, and in that case it is better to store it on a disc, e.g. with the help of SQL based database systems.

In [None]:
db_dataset_mini = DatasetSlice(database_dataset, range(10))
pipeline = [
        # apply_sequential(tonemap_step),
        segment_step,
        crop_step_sequential,
        extract_pattern_step,
        curry(encode_dataset, cfg=cfg),
]

encoded_database = apply_pipeline_dataset(db_dataset_mini, pipeline, verbose=True)

print("Created a test database")

## Perform re-identification

In [None]:
identification_result = apply_pipeline_dataset(encoded_img, [
    curry(identify, database=encoded_database, topk=10),
    curry_sequential(find_matches, cfg)
])

In [None]:
visualise_match(identification_result[0], topk=3)
pass

## Do geometrical verification

In [None]:
geom_matches = curry_sequential(apply_geometric, cfg["geometric"])(identification_result)

visualise_match(geom_matches[0], topk=3)
pass

In [None]:
#with StopwatchPrint("gm"):
geom_matches = curry_sequential(apply_geometric, cfg["geometric"])(identification_result)

pass

## Apply full pipeline to subset of images

In [None]:
query_dataset_mini = DatasetSlice(query_dataset, range(5))

pipeline = [
            print_step("Starting tonemapping..."),
            apply_sequential(tonemap_step),
            print_step("Starting segmentation..."),
            segment_step,
            crop_step_sequential,
            print_step("Starting pattern extraction..."),
            extract_pattern_step,

            print_step("Starting encoding..."),
            curry(encode_dataset, cfg=cfg),

            print_step("Starting identification..."),
            curry(identify, database=encoded_database, topk=10),
            curry(print_topk_accuracy, label="Before geometric verification:"),

            print_step("Starting geometric verification..."),
            curry_sequential(find_matches, cfg),
            curry_sequential(apply_geometric, cfg["geometric"]),
            curry(print_topk_accuracy, label="After geometric verification:"),

            print_step("Starting visualisation..."),
            curry_sequential(visualise_match, topk=3)
            ]

identification_result = apply_pipeline_dataset(query_dataset_mini, pipeline)


## Applying many-to-many

In [None]:
query_dataset_mini = DatasetSlice(query_dataset, range(5))
db_dataset_mini = DatasetSlice(database_dataset, range(5))

encode_pipeline = [
            # curry_sequential(resize_dataset, 256),
            print_step("Starting tonemapping..."),
            # apply_sequential(tonemap_step),
            print_step("Starting segmentation..."),
            segment_step,
            crop_step_sequential,
            print_step("Starting pattern extraction..."),
            extract_pattern_step,

            print_step("Starting encoding..."),
            curry(encode_dataset, group_label='class_id', cfg=cfg)]

encoded_group_database = apply_pipeline_dataset(db_dataset_mini, encode_pipeline)

pipeline = [*encode_pipeline,

            print_step("Starting identification..."),
            curry(identify, database=encoded_group_database, topk=10),
            curry(print_topk_accuracy, label="Before geometric verification:"),

            print_step("Starting geometric verification..."),
            curry_sequential(find_matches, cfg),
            # curry_sequential(apply_geometric, cfg["geometric"]),
            # curry(print_topk_accuracy, label="After geometric verification:"),

            # print_step("Starting visualisation..."),
            # curry_sequential(visualise_match, topk=3)
            ]

identification_group_result = apply_pipeline_dataset(query_dataset_mini, pipeline)


In [None]:
identification_group_result[0][0]

## Changing feature detectors + encoders

In [None]:
query_dataset_mini = DatasetSlice(query_dataset, range(10))
db_dataset_mini = DatasetSlice(database_dataset, range(10))

encode_pipeline = [
            # curry_sequential(resize_dataset, 256),
            print_step("Starting tonemapping..."),
            apply_sequential(tonemap_step),
            print_step("Starting segmentation..."),
            segment_step,
            crop_step_sequential,
            print_step("Starting pattern extraction..."),
            extract_pattern_step,

            print_step("Starting encoding..."),
            curry(encode_dataset, init_apply=getDISK(), cfg=cfg)]

encoded_group_database = apply_pipeline_dataset(db_dataset_mini, encode_pipeline)

pipeline = [*encode_pipeline,

            print_step("Starting identification..."),
            curry(identify, database=encoded_group_database, topk=10),
            curry(print_topk_accuracy, label="Before geometric verification:"),

            print_step("Starting geometric verification..."),
            curry_sequential(find_matches, cfg),
            curry_sequential(apply_geometric, cfg["geometric"]),
            curry(print_topk_accuracy, label="After geometric verification:"),

            print_step("Starting visualisation..."),
            curry_sequential(visualise_match, topk=3)
            ]

identification_group_result = apply_pipeline_dataset(query_dataset_mini, pipeline)

In [None]:
! pip install tools

In [None]:
from tools import load_pickle

encoded = load_pickle("./output/identification_norppa_pattern_HessAffNetHardNet.pickle")

In [None]:
def replace_pattern_with_original(data):
    out = data.copy()
    out["file"] = data["file"].replace("segmented_pattern_resized", "segmented_resized")
    return out

pipeline = [curry_sequential(visualise_match, topk=3, data_process_func=replace_pattern_with_original, figsize=(15,10), filename="./output/reid_images/reid")]

viz = apply_pipeline_dataset(encoded, pipeline)

## Reset tqdm in case progress bars glitch out

In [None]:
from tqdm import tqdm
while len(tqdm._instances) > 0:
    tqdm._instances.pop().close()

In [None]:

query_dataset_mini = DatasetSlice(query_dataset, range(5))

encode_pipeline = [
            # curry_sequential(resize_dataset, 256),
            print_step("Starting tonemapping..."),
            apply_sequential(tonemap_step),
            print_step("Starting segmentation..."),
            segment_step,
            crop_step_sequential,
            print_step("Starting pattern extraction..."),
            extract_pattern_step,

            print_step("Starting encoding..."),
            # curry(encode_dataset, group_label='class_id', cfg=cfg)
            curry(encode_dataset, cfg=cfg)
            ]

encoded = apply_pipeline_dataset(query_dataset_mini, encode_pipeline)

In [None]:
# aggregated_fisher = encoded[0][0]

# encodings = np.array([x[0] for x in encoded])
# summed_fisher = np.sum(encodings, axis=0)

# summed_fisher - aggregated_fisher

encoded[0][1]['labels'][0]['ellipses'][0].shape
