In [1]:
%pip install wildlife-datasets

Collecting wildlife-datasets
  Downloading wildlife_datasets-1.0.4-py3-none-any.whl.metadata (11 kB)
Collecting gdown (from wildlife-datasets)
  Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Downloading wildlife_datasets-1.0.4-py3-none-any.whl (46 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.4/46.4 kB[0m [31m755.2 kB/s[0m eta [36m0:00:00[0m
[?25hDownloading gdown-5.2.0-py3-none-any.whl (18 kB)
Installing collected packages: gdown, wildlife-datasets
Successfully installed gdown-5.2.0 wildlife-datasets-1.0.4
Note: you may need to restart the kernel to use updated packages.


In [2]:
%pip install wildlife-tools

Collecting wildlife-tools
  Downloading wildlife_tools-0.0.9-py3-none-any.whl.metadata (9.3 kB)
Collecting pycocotools (from wildlife-tools)
  Downloading pycocotools-2.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Collecting pytorch-metric-learning (from wildlife-tools)
  Downloading pytorch_metric_learning-2.6.0-py3-none-any.whl.metadata (17 kB)
Collecting faiss-gpu (from wildlife-tools)
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Downloading wildlife_tools-0.0.9-py3-none-any.whl (25 kB)
Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pycocotools-2.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (427 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m427.8/427.8 k

In [3]:
%pip install timm

Note: you may need to restart the kernel to use updated packages.


**Importing Wildlife data set and Wildlife tools**



In [4]:
import timm
import numpy as np
from wildlife_datasets.datasets import MacaqueFaces
from wildlife_tools.data import WildlifeDataset
import torchvision.transforms as T
from wildlife_datasets import datasets, splits
from wildlife_tools.features import DeepFeatures
from wildlife_tools.similarity import CosineSimilarity
from wildlife_tools.inference import KnnClassifier
from sklearn.metrics import precision_score, recall_score, f1_score

# **Same Data set use for Fine tuning**

**MacaqueFaces Dataset**

In [5]:
# Download dataset (if not already downloaded)
datasets.MacaqueFaces.get_data('../data/MacaqueFaces')

# Load dataset metadata
metadata_MacaqueFaces = datasets.MacaqueFaces('../data/MacaqueFaces')
transform = T.Compose([T.Resize([224, 224]), T.ToTensor(), T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])
dataset_MacaqueFaces = WildlifeDataset(metadata_MacaqueFaces.df, metadata_MacaqueFaces.root, transform=transform)

DATASET MacaqueFaces: DOWNLOADING STARTED.


MacaqueFaces.zip: 12.0MB [00:00, 15.1MB/s]                            
MacaqueFaces_ImageInfo.csv: 410kB [00:00, 773kB/s]                             


DATASET MacaqueFaces: EXTRACTING STARTED.
DATASET MacaqueFaces: FINISHED.



In [6]:
dataset_database_MacaqueFaces = WildlifeDataset(metadata_MacaqueFaces.df.iloc[1000:,:], metadata_MacaqueFaces.root, transform=transform)
dataset_query_MacaqueFaces = WildlifeDataset(metadata_MacaqueFaces.df.iloc[:1000,:], metadata_MacaqueFaces.root, transform=transform)

**Importing the pretrain Model**

In [7]:
name = 'hf-hub:BVRA/MegaDescriptor-T-224'
extractor_MacaqueFaces = DeepFeatures(timm.create_model(name, num_classes=0, pretrained=True))
query_MacaqueFaces, database_MacaqueFaces = extractor_MacaqueFaces(dataset_query_MacaqueFaces), extractor_MacaqueFaces(dataset_database_MacaqueFaces)

config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/204M [00:00<?, ?B/s]

100%|█████████████████████████████████████████████████████████████████| 8/8 [02:12<00:00, 16.59s/it]
100%|███████████████████████████████████████████████████████████████| 42/42 [11:29<00:00, 16.42s/it]


In [8]:
similarity_function = CosineSimilarity()
similarity_MacaqueFaces = similarity_function(query_MacaqueFaces, database_MacaqueFaces)
print(similarity_MacaqueFaces)

{'cosine': array([[-0.04655963, -0.06348041, -0.04423384, ...,  0.06067663,
         0.06049749,  0.07648016],
       [-0.05115557, -0.07234082, -0.05413907, ...,  0.08857714,
         0.08899388,  0.10716254],
       [-0.05660899, -0.07521503, -0.05470551, ...,  0.08185373,
         0.08442864,  0.10064523],
       ...,
       [ 0.9420698 ,  0.93323225,  0.94108933, ..., -0.06922977,
        -0.07939504, -0.07788636],
       [ 0.96803546,  0.9612043 ,  0.9673971 , ..., -0.07169758,
        -0.08340549, -0.08055156],
       [ 0.9776342 ,  0.9722817 ,  0.97833085, ..., -0.06218302,
        -0.07746921, -0.07251719]], dtype=float32)}


In [9]:
classifier_MacaqueFaces = KnnClassifier(k=1, database_labels=dataset_database_MacaqueFaces.labels_string)
predictions_MacaqueFaces = classifier_MacaqueFaces(similarity_MacaqueFaces['cosine'])
print("Predictions for 1000 test Images:-\n",predictions_MacaqueFaces)
accuracy_MacaqueFaces = np.mean(dataset_query_MacaqueFaces.labels_string == predictions_MacaqueFaces)
print("Accuracy on MacaqueFaces data: {:.2f}%".format(accuracy_MacaqueFaces * 100))
# precision
precision_MacaqueFaces = precision_score(dataset_query_MacaqueFaces.labels_string, predictions_MacaqueFaces, average='weighted')
# recall
recall_MacaqueFaces = recall_score(dataset_query_MacaqueFaces.labels_string, predictions_MacaqueFaces, average='weighted')
# F1 score
f1_MacaqueFaces = f1_score(dataset_query_MacaqueFaces.labels_string, predictions_MacaqueFaces, average='weighted')
print("Precision:", precision_MacaqueFaces)
print("Recall:", recall_MacaqueFaces)
print("F1 Score:", f1_MacaqueFaces)

Predictions for 1000 test Images:-
 ['Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity'
 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity'
 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity'
 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity'
 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity'
 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity'
 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity'
 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity'
 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity'
 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity'
 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity'
 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity'
 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity' 'Verity'
 'Verity' 'Veri

  results = pd.DataFrame(results).T.fillna(method="ffill").T
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


**LionData Dateset**

In [10]:
# Download dataset (if not already downloaded)
datasets.LionData.get_data('../data/LionData')

# Load dataset metadata
metadata_LionData = datasets.LionData('../data/LionData')
transform = T.Compose([T.Resize([224, 224]), T.ToTensor(), T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])
dataset_LionData = WildlifeDataset(metadata_LionData.df, metadata_LionData.root, transform=transform)

DATASET LionData: DOWNLOADING STARTED.


main.zip: 495MB [00:20, 24.5MB/s]


DATASET LionData: EXTRACTING STARTED.
DATASET LionData: FINISHED.



In [11]:
dataset_database_LionData = WildlifeDataset(metadata_LionData.df.iloc[100:,:], metadata_LionData.root, transform=transform)
dataset_query_LionData = WildlifeDataset(metadata_LionData.df.iloc[:100,:], metadata_LionData.root, transform=transform)

In [12]:
name = 'hf-hub:BVRA/MegaDescriptor-T-224'
extractor_LionData = DeepFeatures(timm.create_model(name, num_classes=0, pretrained=True))
query_LionData, database_LionData = extractor_LionData(dataset_query_LionData), extractor_LionData(dataset_database_LionData)

100%|█████████████████████████████████████████████████████████████████| 1/1 [00:13<00:00, 13.04s/it]
100%|█████████████████████████████████████████████████████████████████| 5/5 [01:22<00:00, 16.46s/it]


In [13]:
similarity_function = CosineSimilarity()
similarity_LionData = similarity_function(query_LionData, database_LionData)
print(similarity_LionData)

{'cosine': array([[0.8689625 , 0.6994199 , 0.8777387 , ..., 0.8297811 , 0.89548856,
        0.86451447],
       [0.9500993 , 0.70559263, 0.9380014 , ..., 0.7477598 , 0.9478769 ,
        0.89982736],
       [0.8724263 , 0.6950849 , 0.8994317 , ..., 0.7589243 , 0.91286826,
        0.91146004],
       ...,
       [0.87236   , 0.7832215 , 0.90146816, ..., 0.8094392 , 0.9129667 ,
        0.90494585],
       [0.89191246, 0.7385788 , 0.9243942 , ..., 0.7845446 , 0.9392828 ,
        0.9575526 ],
       [0.89837384, 0.7575829 , 0.915068  , ..., 0.8079005 , 0.9342221 ,
        0.9315052 ]], dtype=float32)}


In [14]:
# Predictions
classifier_LionData = KnnClassifier(k=1, database_labels=dataset_database_LionData.labels_string)
predictions_LionData = classifier_LionData(similarity_LionData['cosine'])
print("Predictions for 100 test Images:-\n",predictions_LionData)
# Accuracy
accuracy_LionData = np.mean(dataset_query_LionData.labels_string == predictions_LionData)
print("Accuracy on Lion data: {:.2f}%".format(accuracy_LionData * 100))
# F1 score,recall and precision
precision = precision_score(dataset_query_LionData.labels_string, predictions_LionData, average='weighted')
recall = recall_score(dataset_query_LionData.labels_string, predictions_LionData, average='weighted')
f1 = f1_score(dataset_query_LionData.labels_string, predictions_LionData, average='weighted')
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Predictions for 100 test Images:-
 ['Sebastian' 'Saba' 'Etana' 'Tigisi' 'Sebastian' 'Moswen' 'Naape'
 'Simaloi' 'Autumn' 'Lolparpit' 'Maskio' 'Chaimu' 'Naape' 'Saba' 'Saitoti'
 'Esiriwua' 'Sadala' 'Etana' 'Enderoni' 'Kinna' 'Lucinda' 'Enkume' 'Joy'
 'Lemuanik' 'Naibor' 'Shambe' 'Siti' 'Selenkay' 'Sadala' 'Manzy' 'Moswen'
 'Napi' 'Ngare' 'Naimina' 'Napi' 'Cleopatra' 'Moswen' 'Kioni' 'Chaimu'
 'Engiyaa' 'Enderoni' 'Jamal' 'Leia' 'Sebastian' 'Nuru' 'Maskio' 'Mooza'
 'Moswen' 'Kibibi' 'Tigisi' 'Enderoni' 'Lucinda' 'Moswen' 'Kioni'
 'Sebastian' 'Mashavu' 'Mickey' 'Mama-Kali' 'Kioni' 'Ngare' 'Nashipai'
 'Sikio-Kali' 'Ngare' 'Amber' 'Olonyori' 'Lemuanik' 'Kibibi' 'Ngare'
 'Saimutie' 'Amber' 'Shemsa' 'Olonyori' 'Mickey' 'Esiriwua' 'Nashipai'
 'Enderoni' 'Summer' 'Nuru' 'Mickey' 'Joy' 'Cleopatra' 'Tigisi' 'Mooza'
 'Saitoti' 'Kioni' 'Naini' 'Rescue' 'Sikio-Kali' 'Empurra' 'Enadalut'
 'Lemayian' 'Rescue' 'Enadalut' 'Esiriwua' 'Enkume' 'Senteu' 'Doto'
 'Enkume' 'Nakato' 'Enderoni']
Accuracy on Lio

  results = pd.DataFrame(results).T.fillna(method="ffill").T
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


**NyalaDataSet**

In [15]:
# Download dataset (if not already downloaded)
datasets.NyalaData.get_data('../data/NyalaData')

# Load dataset metadata
metadata_N = datasets.NyalaData('../data/NyalaData')
transform = T.Compose([T.Resize([224, 224]), T.ToTensor(), T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])
dataset_N = WildlifeDataset(metadata_N.df, metadata_N.root, transform=transform)

DATASET NyalaData: DOWNLOADING STARTED.


main.zip: 495MB [00:19, 25.4MB/s]


DATASET NyalaData: EXTRACTING STARTED.
DATASET NyalaData: FINISHED.



In [16]:
dataset_database_N = WildlifeDataset(metadata_N.df.iloc[100:,:], metadata_N.root, transform=transform)
dataset_query_N = WildlifeDataset(metadata_N.df.iloc[:100,:], metadata_N.root, transform=transform)

In [17]:
name = 'hf-hub:BVRA/MegaDescriptor-T-224'
extractor_N = DeepFeatures(timm.create_model(name, num_classes=0, pretrained=True))
query_N, database_N = extractor_N(dataset_query_N), extractor_N(dataset_database_N)

100%|█████████████████████████████████████████████████████████████████| 1/1 [00:15<00:00, 15.29s/it]
100%|███████████████████████████████████████████████████████████████| 15/15 [04:05<00:00, 16.37s/it]


In [18]:
similarity_function = CosineSimilarity()
similarity_N = similarity_function(query_N, database_N)
print(similarity_N)

{'cosine': array([[0.86195844, 0.01217476, 0.05642486, ..., 0.6895717 , 0.03031035,
        0.8984183 ],
       [0.2595207 , 0.9185581 , 0.78501725, ..., 0.5010884 , 0.82998693,
        0.17973822],
       [0.71705043, 0.46804816, 0.562163  , ..., 0.9625541 , 0.51908785,
        0.7367307 ],
       ...,
       [0.2627342 , 0.92833006, 0.7531289 , ..., 0.4816041 , 0.80183494,
        0.1790611 ],
       [0.844211  , 0.05148882, 0.10151851, ..., 0.7681326 , 0.08118577,
        0.9342401 ],
       [0.07699884, 0.78782463, 0.920832  , ..., 0.38661015, 0.94270915,
        0.07355437]], dtype=float32)}


In [19]:
# Predictions
classifier_N = KnnClassifier(k=1, database_labels=dataset_database_N.labels_string)
predictions_N = classifier_N(similarity_N['cosine'])
print("Predictions for 100 test Images:-\n",predictions_N)
# Accuracy
accuracy_N = np.mean(dataset_query_N.labels_string == predictions_N)
print("Accuracy on NyalaData data: {:.2f}%".format(accuracy_N * 100))
# precision,recall and F1 score
precision_N = precision_score(dataset_query_N.labels_string, predictions_N, average='weighted')
recall_N = recall_score(dataset_query_N.labels_string, predictions_N, average='weighted')
f1_N = f1_score(dataset_query_N.labels_string, predictions_N, average='weighted')
print("Precision:", precision_N)
print("Recall:", recall_N)
print("F1 Score:", f1_N)

Predictions for 100 test Images:-
 ['166' '10' '57' '89' '58' '179' '55' '58' '149' '71' '87' '125' '56'
 '144' '131' '8' '36' '150' '131' '11' '116' '19' '20' '86' '260' '165'
 '140' '11' '246' '41' '140' '197' '9' '131' '24' '48' '41' '11' '48'
 '131' '272' '47' '21' '46' '8' '241' '114' '62' '21' '48' '252' '151'
 '27' '108' '82' '51' '101' '85' '32' '58' '46' '139' '63' '85' '9' '181'
 '65' '242' '43' '144' '142' '12' '60' '5' '127' '166' '56' '21' '27' '36'
 '20' '188' '51' '51' '42' '20' '111' '62' '165' '68' '104' '174' '48'
 '217' '144' '6' '50' '91' '105' '140']
Accuracy on NyalaData data: 11.00%
Precision: 0.15583333333333335
Recall: 0.11
F1 Score: 0.11866666666666667


  results = pd.DataFrame(results).T.fillna(method="ffill").T
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


**StripeSpotter Dataset**

In [20]:
# Download dataset (if not already downloaded)
datasets.StripeSpotter.get_data('../data/StripeSpotter')
# Load dataset metadata
metadata = datasets.StripeSpotter('../data/StripeSpotter')
transform = T.Compose([T.Resize([224, 224]), T.ToTensor(), T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])
dataset = WildlifeDataset(metadata.df, metadata.root, transform=transform)

DATASET StripeSpotter: DOWNLOADING STARTED.


data-20110718.zip: 71.7MB [00:00, 132MB/s]                            
data-20110718.z02: 100%|██████████| 78.6M/78.6M [00:01<00:00, 46.3MB/s]
data-20110718.z01: 100%|██████████| 78.6M/78.6M [00:00<00:00, 95.0MB/s]


DATASET StripeSpotter: EXTRACTING STARTED.
 copying: data/
 copying: data/.DS_Store
 copying: data/images/
 copying: data/images/img-0000001.jpg
 copying: data/images/img-0000002.jpg
 copying: data/images/img-0000003.jpg
 copying: data/images/img-0000004.jpg
 copying: data/images/img-0000005.jpg
 copying: data/images/img-0000006.jpg
 copying: data/images/img-0000007.jpg
 copying: data/images/img-0000008.jpg
 copying: data/images/img-0000009.jpg
 copying: data/images/img-0000010.jpg
 copying: data/images/img-0000011.jpg
 copying: data/images/img-0000012.jpg
 copying: data/images/img-0000013.jpg
 copying: data/images/img-0000014.jpg
 copying: data/images/img-0000015.jpg
 copying: data/images/img-0000016.jpg
 copying: data/images/img-0000017.jpg
 copying: data/images/img-0000018.jpg
 copying: data/images/img-0000019.jpg
 copying: data/images/img-0000020.jpg
 copying: data/images/img-0000021.jpg
 copying: data/images/img-0000022.jpg
 copying: data/images/img-0000023.jpg
 copying: data/imag

In [21]:
dataset_database = WildlifeDataset(metadata.df.iloc[100:,:], metadata.root, transform=transform)
dataset_query = WildlifeDataset(metadata.df.iloc[:100,:], metadata.root, transform=transform)

In [22]:
name = 'hf-hub:BVRA/MegaDescriptor-T-224'
extractor_stripeSpotter = DeepFeatures(timm.create_model(name, num_classes=0, pretrained=True))
query, database = extractor_stripeSpotter(dataset_query), extractor_stripeSpotter(dataset_database)

100%|█████████████████████████████████████████████████████████████████| 1/1 [00:15<00:00, 15.04s/it]
100%|█████████████████████████████████████████████████████████████████| 6/6 [01:36<00:00, 16.13s/it]


In [23]:
similarity_function = CosineSimilarity()
similarity = similarity_function(query, database)
print(similarity)

{'cosine': array([[0.21301928, 0.58875716, 0.659363  , ..., 0.4394257 , 0.44544137,
        0.6213351 ],
       [0.17780267, 0.38624042, 0.4010223 , ..., 0.7090032 , 0.20244715,
        0.28462344],
       [0.6676847 , 0.4385086 , 0.34033647, ..., 0.2868998 , 0.3974636 ,
        0.36011034],
       ...,
       [0.6064294 , 0.23702008, 0.2166625 , ..., 0.41781956, 0.32831523,
        0.3521495 ],
       [0.5151224 , 0.27989513, 0.32729602, ..., 0.5386833 , 0.39507395,
        0.2875005 ],
       [0.17924348, 0.24542159, 0.30130655, ..., 0.5814506 , 0.28957438,
        0.3585865 ]], dtype=float32)}


In [24]:
classifier = KnnClassifier(k=1, database_labels=dataset_database.labels_string)
predictions = classifier(similarity['cosine'])
print("Predictions for 100 test Images:-\n",predictions)
accuracy = np.mean(dataset_query.labels_string == predictions)
print("Accuracy on StripeSpotter data: {:.2f}%".format(accuracy * 100))
# precision
precision = precision_score(dataset_query.labels_string, predictions, average='weighted')
# recall
recall = recall_score(dataset_query.labels_string, predictions, average='weighted')
# F1 score
f1 = f1_score(dataset_query.labels_string, predictions, average='weighted')

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Predictions for 100 test Images:-
 ['09_212' 'S09_161' '08_085' '08_100' '09_200' 'OP05_625' '09_212'
 '02_149' '01_579' '08_034' '01_679' '08_078' '01_460' '10_002' '01_136'
 '09_212' '09_200' 'S09_111' '02_149' 'S09_069' '08_100' '01_363' '01_363'
 '08_078' '08_034' '01_805' '09_355' '08_085' 'OP05_546' '09_212' '08_100'
 'S09_088' '09_182' 'S09_215' '02_162' '01_230' '08_085' '01_661'
 '02_1026' '09_185' '01_136' '01_661' '01_661' '02_161' '01_165' '02_161'
 'S09_174' 'S09_067' '08_100' 'OP04_225' '02_149' 'S09_067' '09_200'
 '04_008' '01_661' '09_182' '01_805' '02_161' '10_002' '01_036' '02_149'
 '01_165' '09_212' 'S09_211' 'S09_215' '03_045' '01_579' 'OP04_225'
 '08_100' '08_100' '04_008' '09_212' 'OP05_625' 'OP05_625' '02_149'
 '01_230' '09_200' 'OP05_625' '01_661' 'OP04_225' '01_661' '01_661'
 'S09_174' '01_679' '01_661' 'S09_067' 'S09_111' 'S09_112' '09_212'
 'OP04_225' '02_161' '09_185' '04_008' '08_100' '01_460' '01_679'
 'S09_111' '01_679' '01_036' 'S09_067']
Accuracy on Str

  results = pd.DataFrame(results).T.fillna(method="ffill").T
  _warn_prf(average, modifier, msg_start, len(result))


**IPanda50 Dataset**

In [25]:
# Download dataset (if not already downloaded)
datasets.IPanda50.get_data('../data/IPanda50')
# Load dataset metadata
metadata = datasets.IPanda50('../data/IPanda50')
transform = T.Compose([T.Resize([224, 224]), T.ToTensor(), T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])
dataset = WildlifeDataset(metadata.df, metadata.root, transform=transform)

DATASET IPanda50: DOWNLOADING STARTED.


Downloading...
From (original): https://drive.google.com/uc?id=1nkh-g6a8JvWy-XsMaZqrN2AXoPlaXuFg
From (redirected): https://drive.google.com/uc?id=1nkh-g6a8JvWy-XsMaZqrN2AXoPlaXuFg&confirm=t&uuid=8957eafb-a570-4497-aa0e-3a03110d4e48
To: /kaggle/data/IPanda50/iPanda50-images.zip
100%|██████████| 926M/926M [00:04<00:00, 227MB/s]
Downloading...
From: https://drive.google.com/uc?id=1gVREtFWkNec4xwqOyKkpuIQIyWU_Y_Ob
To: /kaggle/data/IPanda50/iPanda50-split.zip
100%|██████████| 140k/140k [00:00<00:00, 68.2MB/s]
Downloading...
From: https://drive.google.com/uc?id=1jdACN98uOxedZDT-6X3rpbooLAAUEbNY
To: /kaggle/data/IPanda50/iPanda50-eyes-labels.zip
100%|██████████| 2.55M/2.55M [00:00<00:00, 208MB/s]


DATASET IPanda50: EXTRACTING STARTED.
DATASET IPanda50: FINISHED.



In [26]:
dataset_database = WildlifeDataset(metadata.df.iloc[100:,:], metadata.root, transform=transform)
dataset_query = WildlifeDataset(metadata.df.iloc[:100,:], metadata.root, transform=transform)

In [27]:
name = 'hf-hub:BVRA/MegaDescriptor-T-224'
extractor_IPanda50 = DeepFeatures(timm.create_model(name, num_classes=0, pretrained=True))
query, database = extractor_IPanda50(dataset_query), extractor_IPanda50(dataset_database)

100%|█████████████████████████████████████████████████████████████████| 1/1 [00:14<00:00, 14.03s/it]
100%|███████████████████████████████████████████████████████████████| 53/53 [14:37<00:00, 16.55s/it]


In [28]:
similarity_function = CosineSimilarity()
similarity = similarity_function(query, database)
print(similarity)

{'cosine': array([[ 0.02714626,  0.53501487,  0.14056852, ...,  0.38829634,
         0.43481818, -0.07561178],
       [ 0.08373766,  0.5616947 ,  0.03785911, ...,  0.3602664 ,
         0.64934146, -0.12950839],
       [ 0.01190434,  0.4141855 ,  0.23542273, ...,  0.45754868,
         0.5449476 , -0.13358036],
       ...,
       [ 0.03203833,  0.12872308, -0.14140168, ...,  0.02447993,
         0.22118229, -0.17661375],
       [-0.08624171,  0.16571996,  0.07199847, ...,  0.14674996,
         0.234734  , -0.02312846],
       [ 0.01022826,  0.06892092,  0.7597883 , ...,  0.42245224,
         0.10611835, -0.0471418 ]], dtype=float32)}


In [29]:
classifier = KnnClassifier(k=1, database_labels=dataset_database.labels_string)
predictions = classifier(similarity['cosine'])
print("Predictions for 100 test Images:-\n",predictions)
accuracy = np.mean(dataset_query.labels_string == predictions)
print("Accuracy on IPanda50 data: {:.2f}%".format(accuracy * 100))

Predictions for 100 test Images:-
 ['10_hexing' '37_xinger' '01_aoliao' '33_wuyi' '47_yingying' '15_maosun'
 '09_fushun' '10_hexing' '36_xingda' '19_nannan' '08_fulai' '31_shurong'
 '44_yayi' '39_xinghui' '37_xinger' '14_maodou' '14_maodou' '00_aibang'
 '21_nina' '35_xilan' '19_nannan' '33_wuyi' '47_yingying' '49_yuanrun'
 '38_xingfan' '04_chengdui' '16_maotao' '48_yongbang' '11_jiaoao'
 '49_yuanrun' '24_qixi' '10_hexing' '35_xilan' '35_xilan' '02_baolan'
 '31_shurong' '31_shurong' '02_baolan' '23_qiubang' '26_qiyuan'
 '04_chengdui' '35_xilan' '08_fulai' '00_aibang' '10_hexing'
 '29_shuangxiong' '11_jiaoao' '44_yayi' '14_maodou' '30_shuqing'
 '36_xingda' '39_xinghui' '33_wuyi' '10_hexing' '10_hexing' '00_aibang'
 '49_yuanrun' '35_xilan' '04_chengdui' '10_hexing' '39_xinghui' '44_yayi'
 '10_hexing' '36_xingda' '16_maotao' '22_nini' '00_aibang' '39_xinghui'
 '10_hexing' '17_meibang' '00_aibang' '46_yazhu' '14_maodou' '15_maosun'
 '32_susu' '26_qiyuan' '27_rourou' '23_qiubang' '18_miaomia

  results = pd.DataFrame(results).T.fillna(method="ffill").T


**CZoo Dataset**

In [30]:
# Download dataset (if not already downloaded)
datasets.CZoo.get_data('../data/CZoo')

# Load dataset metadata
metadata_CZoo = datasets.CZoo('../data/CZoo')
transform = T.Compose([T.Resize([224, 224]), T.ToTensor(), T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])
dataset = WildlifeDataset(metadata_CZoo.df, metadata_CZoo.root, transform=transform)

DATASET CZoo: DOWNLOADING STARTED.


master.zip: 634MB [00:20, 30.8MB/s]


DATASET CZoo: EXTRACTING STARTED.
DATASET CZoo: FINISHED.



In [31]:
dataset_database = WildlifeDataset(metadata.df.iloc[100:,:], metadata.root, transform=transform)
dataset_query = WildlifeDataset(metadata.df.iloc[:100,:], metadata.root, transform=transform)

In [32]:
name = 'hf-hub:BVRA/MegaDescriptor-T-224'
extractor_CZoo = DeepFeatures(timm.create_model(name, num_classes=0, pretrained=True))
query, database = extractor_CZoo(dataset_query), extractor_CZoo(dataset_database)


100%|█████████████████████████████████████████████████████████████████| 1/1 [00:13<00:00, 13.05s/it]
100%|███████████████████████████████████████████████████████████████| 53/53 [14:48<00:00, 16.77s/it]


In [33]:
similarity_function = CosineSimilarity()
similarity = similarity_function(query, database)
print(similarity)

{'cosine': array([[ 0.02714626,  0.53501487,  0.14056852, ...,  0.38829634,
         0.43481818, -0.07561178],
       [ 0.08373766,  0.5616947 ,  0.03785911, ...,  0.3602664 ,
         0.64934146, -0.12950839],
       [ 0.01190434,  0.4141855 ,  0.23542273, ...,  0.45754868,
         0.5449476 , -0.13358036],
       ...,
       [ 0.03203833,  0.12872308, -0.14140168, ...,  0.02447993,
         0.22118229, -0.17661375],
       [-0.08624171,  0.16571996,  0.07199847, ...,  0.14674996,
         0.234734  , -0.02312846],
       [ 0.01022826,  0.06892092,  0.7597883 , ...,  0.42245224,
         0.10611835, -0.0471418 ]], dtype=float32)}


In [34]:
classifier = KnnClassifier(k=1, database_labels=dataset_database.labels_string)
predictions = classifier(similarity['cosine'])
print("Predictions for 100 test Images:-\n",predictions)
accuracy = np.mean(dataset_query.labels_string == predictions)
print("Accuracy on CZoo data: {:.2f}%".format(accuracy * 100))

Predictions for 100 test Images:-
 ['10_hexing' '37_xinger' '01_aoliao' '33_wuyi' '47_yingying' '15_maosun'
 '09_fushun' '10_hexing' '36_xingda' '19_nannan' '08_fulai' '31_shurong'
 '44_yayi' '39_xinghui' '37_xinger' '14_maodou' '14_maodou' '00_aibang'
 '21_nina' '35_xilan' '19_nannan' '33_wuyi' '47_yingying' '49_yuanrun'
 '38_xingfan' '04_chengdui' '16_maotao' '48_yongbang' '11_jiaoao'
 '49_yuanrun' '24_qixi' '10_hexing' '35_xilan' '35_xilan' '02_baolan'
 '31_shurong' '31_shurong' '02_baolan' '23_qiubang' '26_qiyuan'
 '04_chengdui' '35_xilan' '08_fulai' '00_aibang' '10_hexing'
 '29_shuangxiong' '11_jiaoao' '44_yayi' '14_maodou' '30_shuqing'
 '36_xingda' '39_xinghui' '33_wuyi' '10_hexing' '10_hexing' '00_aibang'
 '49_yuanrun' '35_xilan' '04_chengdui' '10_hexing' '39_xinghui' '44_yayi'
 '10_hexing' '36_xingda' '16_maotao' '22_nini' '00_aibang' '39_xinghui'
 '10_hexing' '17_meibang' '00_aibang' '46_yazhu' '14_maodou' '15_maosun'
 '32_susu' '26_qiyuan' '27_rourou' '23_qiubang' '18_miaomia

  results = pd.DataFrame(results).T.fillna(method="ffill").T


In [35]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Calculate precision
precision = precision_score(dataset_query.labels_string, predictions, average='weighted')

# Calculate recall
recall = recall_score(dataset_query.labels_string, predictions, average='weighted')

# Calculate F1 score
f1 = f1_score(dataset_query.labels_string, predictions, average='weighted')

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


Precision: 0.8961111111111112
Recall: 0.85
F1 Score: 0.8478354978354978


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


**CowDataset**

In [36]:
# Download dataset (if not already downloaded)
datasets.CowDataset.get_data('../data/CowDataset')

# Load dataset metadata
metadata_CowDataset = datasets.CowDataset('../data/CowDataset')
transform = T.Compose([T.Resize([224, 224]), T.ToTensor(), T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])
dataset = WildlifeDataset(metadata_CowDataset.df, metadata_CowDataset.root, transform=transform)

DATASET CowDataset: DOWNLOADING STARTED.


31210192: 4.45GB [02:16, 32.6MB/s]                            


DATASET CowDataset: EXTRACTING STARTED.
DATASET CowDataset: FINISHED.



In [37]:
dataset_database = WildlifeDataset(metadata.df.iloc[100:,:], metadata.root, transform=transform)
dataset_query = WildlifeDataset(metadata.df.iloc[:100,:], metadata.root, transform=transform)

In [38]:
name = 'hf-hub:BVRA/MegaDescriptor-T-224'
extractor_CowDataset = DeepFeatures(timm.create_model(name, num_classes=0, pretrained=True))
query, database = extractor_CowDataset(dataset_query), extractor_CowDataset(dataset_database)

100%|█████████████████████████████████████████████████████████████████| 1/1 [00:15<00:00, 15.10s/it]
100%|███████████████████████████████████████████████████████████████| 53/53 [13:20<00:00, 15.09s/it]


In [39]:
similarity_function = CosineSimilarity()
similarity = similarity_function(query, database)
print(similarity)

{'cosine': array([[ 0.02714626,  0.53501487,  0.14056852, ...,  0.38829634,
         0.43481818, -0.07561178],
       [ 0.08373766,  0.5616947 ,  0.03785911, ...,  0.3602664 ,
         0.64934146, -0.12950839],
       [ 0.01190434,  0.4141855 ,  0.23542273, ...,  0.45754868,
         0.5449476 , -0.13358036],
       ...,
       [ 0.03203833,  0.12872308, -0.14140168, ...,  0.02447993,
         0.22118229, -0.17661375],
       [-0.08624171,  0.16571996,  0.07199847, ...,  0.14674996,
         0.234734  , -0.02312846],
       [ 0.01022826,  0.06892092,  0.7597883 , ...,  0.42245224,
         0.10611835, -0.0471418 ]], dtype=float32)}


In [40]:
classifier = KnnClassifier(k=1, database_labels=dataset_database.labels_string)
predictions = classifier(similarity['cosine'])
print("Predictions for 100 test Images:-\n",predictions)
accuracy = np.mean(dataset_query.labels_string == predictions)
print("Accuracy on CowDataset data: {:.2f}%".format(accuracy * 100))

Predictions for 100 test Images:-
 ['10_hexing' '37_xinger' '01_aoliao' '33_wuyi' '47_yingying' '15_maosun'
 '09_fushun' '10_hexing' '36_xingda' '19_nannan' '08_fulai' '31_shurong'
 '44_yayi' '39_xinghui' '37_xinger' '14_maodou' '14_maodou' '00_aibang'
 '21_nina' '35_xilan' '19_nannan' '33_wuyi' '47_yingying' '49_yuanrun'
 '38_xingfan' '04_chengdui' '16_maotao' '48_yongbang' '11_jiaoao'
 '49_yuanrun' '24_qixi' '10_hexing' '35_xilan' '35_xilan' '02_baolan'
 '31_shurong' '31_shurong' '02_baolan' '23_qiubang' '26_qiyuan'
 '04_chengdui' '35_xilan' '08_fulai' '00_aibang' '10_hexing'
 '29_shuangxiong' '11_jiaoao' '44_yayi' '14_maodou' '30_shuqing'
 '36_xingda' '39_xinghui' '33_wuyi' '10_hexing' '10_hexing' '00_aibang'
 '49_yuanrun' '35_xilan' '04_chengdui' '10_hexing' '39_xinghui' '44_yayi'
 '10_hexing' '36_xingda' '16_maotao' '22_nini' '00_aibang' '39_xinghui'
 '10_hexing' '17_meibang' '00_aibang' '46_yazhu' '14_maodou' '15_maosun'
 '32_susu' '26_qiyuan' '27_rourou' '23_qiubang' '18_miaomia

  results = pd.DataFrame(results).T.fillna(method="ffill").T


In [41]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Calculate precision
precision = precision_score(dataset_query.labels_string, predictions, average='weighted')

# Calculate recall
recall = recall_score(dataset_query.labels_string, predictions, average='weighted')

# Calculate F1 score
f1 = f1_score(dataset_query.labels_string, predictions, average='weighted')

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Precision: 0.8961111111111112
Recall: 0.85
F1 Score: 0.8478354978354978


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
