This code creates dictionaries for the left and right sides of the turtle images that are stored in the google drive locations. The plan is to read from a csv list and then save the left and right images.

# Utils

In [None]:
def extract_features(seg_path, df, set, feature_extractor="sift", n_keypoints=1024): # possible hyperparameters
  '''
    This function extracts the keypoints using either sift or superpoint and returns a dictionary of features.

    Inputs
    `seg_path` = the stem part which contains all the images (e.g. "/content/drive/MyDrive/AI for Turtles Shared/05. Subgroup folders/Face detection /Data/Dataset corrected image orientation face crops/")
    `df` = dataframe containing the year and the name of the image
    `set` = which split set you want to use (e.g. "train", "test", or "val")
    `feature_extractor` = whether you want to use "superpoint" or "sift:
    `n_keypoints` = the maximum number of key points to use

    Outputs
    `feature_dict` = the dictionary of the features chose
  '''
  feature_dict = {}

  if feature_extractor == 'superpoint':
    extractor = SuperPoint(max_num_keypoints=n_keypoints).eval().to(device)  # load the extractor

  elif feature_extractor == 'sift':
    extractor = SIFT(max_num_keypoints=n_keypoints).eval().to(device)  # load the extractor

  elif feature_extractor == 'DISK':
    extractor = DISK(max_num_keypoints=n_keypoints).eval().to(device)  # load the extractor

  elif feature_extractor == 'ALIKED':
    extractor = ALIKED(max_num_keypoints=n_keypoints).eval().to(device)  # load the extractor

  else:
    print("Unrecognised FeatureExtractionMethod... using siftt instead")
    FeatureExtractionMethod= 'sift'
    extractor = SuperPoint(max_num_keypoints=n_keypoints).eval().to(device)  # load the extractor

  # only extract features for wanted split-set
  sub_df = df[df["split"] == set]

  s = time.time()
  for _, row in sub_df.iterrows():
    file_name = str(row['year']) + "/" + row['name'] # year folder and file name
    # file_name = file_name.split(".")[0] # remove file ending
    #seg_path = "/content/drive/MyDrive/AI for Turtles Shared/05. Subgroup folders/Face detection /Data/Dataset corrected image orientation face crops/"

    actual_path = glob(seg_path + file_name + "*")

    if actual_path:
      image = load_image(actual_path[0]).to(device)
      features = extractor.extract(image)

      feature_dict[file_name] = features
  e = time.time()

  print(f"nr ims extracted: {len(feature_dict)}, time total: {e - s}")

  return feature_dict

In [None]:
# Existing Libraries
import os
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
# If we are on colab: this clones the repo and installs the dependencies
from pathlib import Path

if Path.cwd().name != "LightGlue":
    !git clone --quiet https://github.com/cvg/LightGlue/
    %cd LightGlue
    !pip install --progress-bar off --quiet -e .

Mounted at /content/drive
/content/LightGlue
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
  Building editable for lightglue (pyproject.toml) ... [?25l[?25hdone


In [None]:
from lightglue import SuperPoint, DISK, SIFT, ALIKED
from lightglue.utils import load_image
import pandas as pd
import time
import torch
from glob import glob

# Extract features

## Establish the file names
filepaths for images and train/test/validation/image lists

In [None]:
# Establish the base path for all the images
base_path = "/content/drive/.shortcut-targets-by-id/1XpUS4zHTo6vlS_lJvVOlT0dHnNwnZGI-/Dataset corrected image orientation face crops/"
# csv for all the images
image_csv = "/content/drive/.shortcut-targets-by-id/13g8qiyASITlNVCba3zhMjFaqIY53IMqE/dataset/data_master_v3.csv"
# where to save the feature extraction details
save_path = "/content/drive/.shortcut-targets-by-id/1duwYN4CyEW2al2kOiQpuv8Wdnsnt06MA/LightGlue_Keypoints/"

In [None]:
#load the csv as a pandas dataframe
df_all = pd.read_csv(image_csv)
df_L = df_all[df_all['side']=="L"]
df_R = df_all[df_all['side']=="R"]
df_all

Unnamed: 0,year,name,ext,ID,side,angle,path,uid,side_sl,side_sl_ok,split,appears_once,novelty,label
0,2005,03-008 L,jpg,03-008,L,-349 L,2005/03-008 L [-349 L].jpg,0,L,True,train,True,False,0
1,2005,03-008 R,jpg,03-008,R,+345 R,2005/03-008 R [+345 R].jpg,1,R,True,train,True,False,1
2,2005,03-017 L,jpg,03-017,L,-361 L,2005/03-017 L [-361 L].jpg,2,L,True,train,True,False,2
3,2005,03-017 R,jpg,03-017,R,+363 R,2005/03-017 R [+363 R].jpg,3,R,True,train,True,False,3
4,2005,03-057 L,jpg,03-057,L,-330 L,2005/03-057 L [-330 L].jpg,4,L,True,train,True,False,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5793,2023,23-118 R,jpg,23-118,R,+346 R,2023/23-118 R [+346 R].jpg,5793,R,True,train,True,False,5002
5794,2023,23-120 L,jpg,23-120,L,-341 L,2023/23-120 L [-341 L].jpg,5794,L,True,train,True,False,5003
5795,2023,23-120 R,jpg,23-120,R,+348 R,2023/23-120 R [+348 R].jpg,5795,R,True,train,True,False,5004
5796,2023,23-121 L,jpg,23-121,L,-317 L,2023/23-121 L [-317 L].jpg,5796,L,True,val,True,False,5005


In [None]:
# Use GPU if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 'mps', 'cpu'

# Create keypoint dictionaries

In [None]:
extractor = "sift"
n_kpts = 1024

train_dict_L = extract_features(base_path, df_L, "train", feature_extractor=extractor, n_keypoints=n_kpts)
torch.save(train_dict_L, f"{save_path}trainL_{extractor}_{n_kpts}.pth")

nr ims extracted: 1927, time total: 852.760272026062


In [None]:
train_dict_R = extract_features(base_path, df_R, "train", feature_extractor=extractor, n_keypoints=n_kpts)
torch.save(train_dict_R, f"{save_path}trainR_{extractor}_{n_kpts}.pth")

test_dict_L = extract_features(base_path, df_L, "test", feature_extractor=extractor, n_keypoints=n_kpts)
torch.save(test_dict_L, f"{save_path}testL_{extractor}_{n_kpts}.pth")

test_dict_R = extract_features(base_path, df_R, "test", feature_extractor=extractor, n_keypoints=n_kpts)
torch.save(test_dict_R, f"{save_path}testR_{extractor}_{n_kpts}.pth")

val_dict_L = extract_features(base_path, df_L, "val", feature_extractor=extractor, n_keypoints=n_kpts)
torch.save(val_dict_L, f"{save_path}valL_{extractor}_{n_kpts}.pth")

val_dict_R = extract_features(base_path, df_R, "val", feature_extractor=extractor, n_keypoints=n_kpts)
torch.save(val_dict_R, f"{save_path}valR_{extractor}_{n_kpts}.pth")

nr ims extracted: 1906, time total: 779.7659618854523
nr ims extracted: 472, time total: 196.58624243736267
nr ims extracted: 469, time total: 192.81889247894287
nr ims extracted: 500, time total: 203.4394817352295
nr ims extracted: 508, time total: 210.02946066856384


In [None]:
df_temp = df_all[df_all['split']=="test"]
df_temp = df_temp[df_temp['appears_once']==True]
df_temp

Unnamed: 0,year,name,ext,ID,side,angle,path,uid,side_sl,side_sl_ok,split,appears_once,novelty,label
6,2005,03-063 L,jpg,03-063,L,-350 L,2005/03-063 L [-350 L].jpg,6,L,True,test,True,False,12
7,2005,03-063 R,jpg,03-063,R,+354 R,2005/03-063 R [+354 R].jpg,7,R,True,test,True,False,13
26,2005,04-070 L,jpg,04-070,L,-345 L,2005/04-070 L [-345 L].jpg,26,L,True,test,True,False,43
27,2005,04-070 R,jpg,04-070,R,+318 R,2005/04-070 R [+318 R].jpg,27,R,True,test,True,False,44
35,2005,05-005 L,jpg,05-005,L,-330 L,2005/05-005 L [-330 L].jpg,35,L,True,test,True,False,53
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5763,2023,23-093 R,jpg,23-093,R,+335 R,2023/23-093 R [+335 R].jpg,5763,R,True,test,True,False,4972
5766,2023,23-099 L,jpg,23-099,L,-334 L,2023/23-099 L [-334 L].jpg,5766,L,True,test,True,False,4975
5767,2023,23-099 R,jpg,23-099,R,+336 R,2023/23-099 R [+336 R].jpg,5767,R,True,test,True,False,4976
5774,2023,23-107 L,jpg,23-107,L,-326 L,2023/23-107 L [-326 L].jpg,5774,L,True,test,True,False,4983


In [None]:
df_temp = df_all[df_all['split']=="test"]
df_temp = df_temp[df_temp['appears_once']==False]
df_temp

Unnamed: 0,year,name,ext,ID,side,angle,path,uid,side_sl,side_sl_ok,split,appears_once,novelty,label
186,2006,04-085 L,jpg,04-085,L,-361 L,2006/04-085 L [-361 L].jpg,186,L,True,test,False,True,46
187,2006,04-085 R,jpg,04-085,R,+340 R,2006/04-085 R [+340 R].jpg,187,R,True,test,False,False,47
194,2006,05-047 L,jpg,05-047,L,-362 L,2006/05-047 L [-362 L].jpg,194,L,True,test,False,False,126
195,2006,05-047 R,jpg,05-047,R,+355 R,2006/05-047 R [+355 R].jpg,195,R,True,test,False,False,127
200,2006,05-087 L,jpg,05-087,L,-334 L,2006/05-087 L [-334 L].jpg,200,L,True,test,False,True,179
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5590,2023,20-070 R,jpg,20-070,R,+309 R,2023/20-070 R [+309 R].jpg,5590,R,True,test,False,False,4305
5601,2023,21-141 L,jpg,21-141,L,-339 L,2023/21-141 L [-339 L].jpg,5601,L,True,test,False,False,4519
5602,2023,21-141 R,jpg,21-141,R,+344 R,2023/21-141 R [+344 R].jpg,5602,R,True,test,False,False,4520
5615,2023,22-025 L,jpg,22-025,L,-359 L,2023/22-025 L [-359 L].jpg,5615,L,True,test,False,False,4617
