In [1]:
import sys
import os
sys.path.append("../")

import pandas as pd
import numpy as np
import torch
import timm
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from ultralytics import YOLO
from PIL import Image

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
animals = ['bison', 'badger', 'ibex', 'beaver', 'red deer', 'golden jackal', 'chamois', 'cat', 'goat',
           'roe deer', 'dog', 'raccoon dog', 'fallow deer', 'squirrel', 'moose', 'equid', 'genet',
           'wolverine', 'hedgehog', 'lagomorph', 'wolf', 'otter', 'lynx', 'marmot', 'micromammal', 
           'mouflon', 'sheep', 'mustelid', 'bird', 'bear', 'porcupine', 'nutria', 'muskrat', 'raccoon',
           'fox', 'reindeer', 'wild boar', 'cow']

DFYOLO_NAME = "DF"
DFYOLO_WIDTH = 960 # image width
DFYOLO_THRES = 0.6
DFYOLO_WEIGHTS = os.path.join('models/deepfaune-yolov8s_960.pt')

CROP_SIZE = 182
BACKBONE = "vit_large_patch14_dinov2.lvd142m"
DFVIT_WEIGHTS = os.path.join('models/deepfaune-vit_large_patch14_dinov2.lvd142m.v4.pt')

In [4]:
results = pd.read_csv('../y_clean_thin.csv', index_col=0)
results.image_path = '../' + results.image_path

In [5]:
def cropSquareCVtoPIL(imagecv, box):
    x1, y1, x2, y2 = box
    xsize = (x2-x1)
    ysize = (y2-y1)
    if xsize>ysize:
        y1 = y1-int((xsize-ysize)/2)
        y2 = y2+int((xsize-ysize)/2)
    if ysize>xsize:
        x1 = x1-int((ysize-xsize)/2)
        x2 = x2+int((ysize-xsize)/2)
    height, width, _ = imagecv.shape
    croppedimagecv = imagecv[max(0,int(y1)):min(int(y2),height),max(0,int(x1)):min(int(x2),width)]
    croppedimage = Image.fromarray(croppedimagecv[:,:,(2,1,0)]) # converted to PIL BGR image
    return croppedimage


class Detector:
    def __init__(self, device: str='cuda'):
        self.device = device
        self.yolo = YOLO(DFYOLO_WEIGHTS)

    def bestBoxDetection(self, filename_or_imagecv):
        try:
            results = self.yolo(filename_or_imagecv, device=self.device)
        except FileNotFoundError:
            return None, 0, np.zeros(4), 0, []
        except Exception as err:
            print(err)
            return None, 0, np.zeros(4), 0, []
        # orig_img a numpy array (cv2) in BGR
        imagecv = results[0].cpu().orig_img
        detection = results[0].cpu().numpy().boxes

        # Are there any relevant boxes?
        if not len(detection.cls):
            return None, 0, np.zeros(4), 0, []
        else:
            # Yes. Non empty image
            pass
        # Is there a relevant animal box? 
        try:
            # Yes. Selecting the best animal box
            kbox = np.where(detection.cls==0)[0][0]
        except IndexError:
            # No: Selecting the best box for another category (human, vehicle)
            kbox = 0
        # categories are 1=animal, 2=person, 3=vehicle and the empty category 0=empty
        category = int(detection.cls[kbox]) + 1
        box = detection.xyxy[kbox] # xmin, ymin, xmax, ymax
        # Is this an animal box ?
        if category == 1:
            # Yes: cropped image is required for classification
            croppedimage = cropSquareCVtoPIL(imagecv, box.copy())
        else: 
            # No: cropped image is not required for classification 
            croppedimage = None
        ## animal count
        if category == 1:
            count = sum(detection.cls==0) # only above a threshold
        else:
            count = 0
        ## human boxes
        ishuman = (detection.cls==1)
        if any(ishuman==True):
            humanboxes = detection.xyxy[ishuman,]
        else:
            humanboxes = []
        return croppedimage, category, box, count, humanboxes

    def merge(self, detector):
        pass

In [6]:
class Classifier:
    def __init__(self, device=None):
        self.model = Model(device)
        self.model.loadWeights(DFVIT_WEIGHTS)
        self.transforms = transforms.Compose([
            transforms.Resize(size=(CROP_SIZE, CROP_SIZE), interpolation=transforms.InterpolationMode.BICUBIC, max_size=None, antialias=None),
            transforms.ToTensor(),
            transforms.Normalize(mean=torch.tensor([0.4850, 0.4560, 0.4060]), std=torch.tensor([0.2290, 0.2240, 0.2250]))])

    def predictOnBatch(self, batchtensor, withsoftmax=True):
        return self.model.predict(batchtensor, withsoftmax)

    # croppedimage loaded by PIL
    def preprocessImage(self, croppedimage):
        preprocessimage = self.transforms(croppedimage)
        return preprocessimage.unsqueeze(dim=0)

class Model(nn.Module):
    def __init__(self, device=None):
        """
        Constructor of model classifier
        """
        super().__init__()
        self.base_model = timm.create_model(BACKBONE, pretrained=False,
                                            num_classes=len(animals),
                                            dynamic_img_size=True)
        print(f"Using {BACKBONE} for classification")
        self.backbone = BACKBONE
        self.nbclasses = len(animals)
        self.device = device

    def forward(self, input):
        x = self.base_model(input)
        return x

    def predict(self, data, withsoftmax=True):
        """
        Predict on test DataLoader
        :param test_loader: test dataloader: torch.utils.data.DataLoader
        :return: numpy array of predictions without soft max
        """
        self.eval()
        self.to(self.device)
        with torch.no_grad():
            x = data.to(self.device)
            embeddings = self.base_model.forward_features(x)
            if withsoftmax:
                predictions = self.base_model.forward_head(embeddings).softmax(dim=1)
            else:
                predictions = self.base_model.forward_head(embeddings)
            embeddings = embeddings[:, 0, :]
        return predictions.cpu().numpy(), embeddings.cpu().numpy()

    def loadWeights(self, path):
        """
        :param path: path of .pt save of model
        """
        if path[-3:] != ".pt":
            path += ".pt"
        try:
            params = torch.load(path, map_location=self.device, weights_only=False)
            args = params['args']
            if self.nbclasses != args['num_classes']:
                raise Exception("You load a model ({}) that does not have the same number of class"
                                "({})".format(args['num_classes'], self.nbclasses))
            self.backbone = args['backbone']
            self.nbclasses = args['num_classes']
            self.load_state_dict(params['state_dict'])
        except Exception as e:
            print("Can't load checkpoint model because :\n\n " + str(e), file=sys.stderr)
            raise e

In [7]:
classifier = Classifier('cuda')
detector = Detector()

NameError: name 'BACKBONE' is not defined

In [1]:
batch_size = 50

batch = []
paths = []

for _, row in results.iterrows():
    image_path = row['image_path']
    cropped_image, category, box, count, humanboxes  = detector.bestBoxDetection(image_path)
    batch.append(classifier.preprocessImage(cropped_image))
    paths.append(image_path)

    if len(batch) == batch_size:
        cropped_images_tensor = torch.zeros((len(batch)))
        scores = classifier.predictOnBatch(cropped_images_tensor)

        batch = []
        paths = []
        break

scores

NameError: name 'results' is not defined

In [20]:
cropped_tensor = torch.ones((1,3,CROP_SIZE,CROP_SIZE))
cropped_tensor[0,:,:,:] =  classifier.preprocessImage(cropped_image)
scores = classifier.predictOnBatch(cropped_tensor)

In [21]:
scores[0][0]

array([    0.99998,  2.0047e-07,  9.2036e-08,  2.5957e-07,  4.8504e-07,  2.6472e-07,  2.4086e-07,  3.2431e-07,  7.2999e-08,  9.3558e-08,  3.8929e-07,  1.7785e-07,  2.4361e-07,  3.7002e-07,  6.0992e-07,  5.5937e-07,  4.8268e-07,  5.9408e-08,  4.8595e-08,  6.8013e-08,  2.5458e-08,  2.7852e-08,  1.0167e-07,  2.5306e-08,
        5.9728e-08,  1.9939e-07,  5.6642e-07,  4.6087e-06,  2.2614e-07,  3.5323e-07,  8.9175e-07,  1.1062e-08,  3.7038e-07,  1.3163e-07,  5.8316e-07,  8.9406e-08,  1.3993e-06,  2.9153e-06], dtype=float32)

In [22]:
pd.DataFrame({'species': animals, 'score':scores[0][0]})

Unnamed: 0,species,score
0,bison,0.9999824
1,badger,2.004708e-07
2,ibex,9.203561e-08
3,beaver,2.59574e-07
4,red deer,4.850423e-07
5,golden jackal,2.647196e-07
6,chamois,2.408649e-07
7,cat,3.243088e-07
8,goat,7.299902e-08
9,roe deer,9.355792e-08


### Only detector

In [20]:
from tqdm import tqdm

DFYOLO_NAME = "DF"
DFYOLO_WIDTH = 960 # image width
DFYOLO_THRES = 0.6
DFYOLO_WEIGHTS = os.path.join('models/deepfaune-yolov8s_960.pt')

results = pd.read_csv('../y_clean_thin.csv', index_col=0)
results.image_path = '../' + results.image_path
paths = results.image_path

class Detector:
    def __init__(self, device: str='cuda'):
        self.device = device
        self.yolo = YOLO(DFYOLO_WEIGHTS, verbose=False)

    def bestBoxDetection(self, filename_or_imagecv):
        try:
            results = self.yolo(filename_or_imagecv, device=self.device)
        except FileNotFoundError:
            return None, 0, np.zeros(4), 0, []
        except Exception as err:
            # print(err)
            return None, 0, np.zeros(4), 0, []
        # orig_img a numpy array (cv2) in BGR
        detection = results[0].cpu().numpy().boxes

        # Are there any relevant boxes?
        if not len(detection.cls):
            return None, 0, np.zeros(4), 0, []
        else:
            # Yes. Non empty image
            pass
        # Is there a relevant animal box? 
        try:
            # Yes. Selecting the best animal box
            kbox = np.where(detection.cls==0)[0][0]
        except IndexError:
            # No: Selecting the best box for another category (human, vehicle)
            kbox = 0
        # categories are 1=animal, 2=person, 3=vehicle and the empty category 0=empty
        category = int(detection.cls[kbox]) + 1
        box = detection.xyxy[kbox] # xmin, ymin, xmax, ymax
        # Is this an animal box ?
        if category == 1:
            # Yes: cropped image is required for classification
            croppedimage = None
        else: 
            # No: cropped image is not required for classification 
            croppedimage = None
        ## animal count
        if category == 1:
            count = sum(detection.cls==0) # only above a threshold
        else:
            count = 0
        ## human boxes
        ishuman = (detection.cls==1)
        if any(ishuman==True):
            humanboxes = detection.xyxy[ishuman,]
        else:
            humanboxes = []
        return croppedimage, category, box, count, humanboxes

detector = Detector()

In [36]:
results = pd.DataFrame({'image_path': [], 'bbox': [], 'category': [], 'count': []})
i = 0
l = len(paths)

for path in paths:
    _, category, box, count, _  = detector.bestBoxDetection(path)
    results = pd.concat([results, pd.DataFrame([[path, box, category, count]], columns=results.columns)], ignore_index=True)
    i += 1
    print(f'{100*i/l: .1f}%')


image 1/1 e:\mgr\deepfaune\..\..\pictures\01_CZARNE\B\Lato\1\2023-07-16 08-48-23.JPG: 544x960 1 person, 123.3ms
Speed: 8.7ms preprocess, 123.3ms inference, 7.3ms postprocess per image at shape (1, 3, 544, 960)
 0.0%

image 1/1 e:\mgr\deepfaune\..\..\pictures\01_CZARNE\B\Lato\1\2023-07-21 06-39-47.JPG: 544x960 1 animal, 73.6ms
Speed: 6.5ms preprocess, 73.6ms inference, 7.9ms postprocess per image at shape (1, 3, 544, 960)
 0.0%

image 1/1 e:\mgr\deepfaune\..\..\pictures\01_CZARNE\B\Lato\1\2023-07-25 13-56-58.JPG: 544x960 1 animal, 76.7ms
Speed: 8.0ms preprocess, 76.7ms inference, 7.9ms postprocess per image at shape (1, 3, 544, 960)
 0.0%

image 1/1 e:\mgr\deepfaune\..\..\pictures\01_CZARNE\B\Lato\1\2023-07-28 13-48-47.JPG: 544x960 1 animal, 74.6ms
Speed: 6.5ms preprocess, 74.6ms inference, 9.2ms postprocess per image at shape (1, 3, 544, 960)
 0.0%

image 1/1 e:\mgr\deepfaune\..\..\pictures\01_CZARNE\B\Lato\1\2023-07-29 08-26-10.JPG: 544x960 1 animal, 69.4ms
Speed: 6.9ms preprocess, 6

In [37]:
results['empty'] = results['category'] == 0
results.to_csv('../deepfaune_detector_results.csv')

In [34]:
results[0:50]

Unnamed: 0,image_path,bbox,category,count
0,../../pictures/01_CZARNE/B/Lato/1/2023-07-16 08-48-23.JPG,"[3234.5544, 0.0, 6528.0, 3060.5872]",2.0,0.0
1,../../pictures/01_CZARNE/B/Lato/1/2023-07-21 06-39-47.JPG,"[1913.8944, 2152.0688, 2610.9468, 2770.2737]",1.0,1.0
2,../../pictures/01_CZARNE/B/Lato/1/2023-07-25 13-56-58.JPG,"[1018.0964, 2017.4259, 1503.2144, 2375.9036]",1.0,1.0
3,../../pictures/01_CZARNE/B/Lato/1/2023-07-28 13-48-47.JPG,"[1047.3823, 1877.0756, 1913.8517, 2442.0508]",1.0,1.0
4,../../pictures/01_CZARNE/B/Lato/1/2023-07-29 08-26-10.JPG,"[1869.2074, 1535.0756, 2749.2756, 2253.3918]",1.0,1.0
5,../../pictures/01_CZARNE/B/Lato/1/2023-07-29 08-26-22.JPG,"[0.018624878, 1690.8251, 316.1827, 2270.7837]",1.0,1.0
6,../../pictures/01_CZARNE/B/Lato/2/2023-07-18 02-03-54.JPG,"[0.0, 0.0, 0.0, 0.0]",0.0,0.0
7,../../pictures/01_CZARNE/B/Lato/2/2023-07-18 02-06-43.JPG,"[0.0, 0.0, 0.0, 0.0]",0.0,0.0
8,../../pictures/01_CZARNE/B/Lato/2/2023-07-25 18-36-14.JPG,"[0.0, 0.0, 0.0, 0.0]",0.0,0.0
9,../../pictures/01_CZARNE/B/Lato/2/2023-07-26 03-47-00.JPG,"[0.0, 0.0, 0.0, 0.0]",0.0,0.0


In [35]:
og = pd.read_csv('../y_clean_thin.csv', index_col=0)
og[0:50]

Unnamed: 0,image_path,species
0,../pictures/01_CZARNE/B/Lato/1/2023-07-16 08-48-23.JPG,bird
1,../pictures/01_CZARNE/B/Lato/1/2023-07-21 06-39-47.JPG,bird
2,../pictures/01_CZARNE/B/Lato/1/2023-07-25 13-56-58.JPG,bird
3,../pictures/01_CZARNE/B/Lato/1/2023-07-28 13-48-47.JPG,bird
4,../pictures/01_CZARNE/B/Lato/1/2023-07-29 08-26-10.JPG,bird
5,../pictures/01_CZARNE/B/Lato/1/2023-07-29 08-26-22.JPG,bird
6,../pictures/01_CZARNE/B/Lato/2/2023-07-18 02-03-54.JPG,red deer
7,../pictures/01_CZARNE/B/Lato/2/2023-07-18 02-06-43.JPG,red deer
8,../pictures/01_CZARNE/B/Lato/2/2023-07-25 18-36-14.JPG,empty
9,../pictures/01_CZARNE/B/Lato/2/2023-07-26 03-47-00.JPG,red deer
