In [1]:
import torch
from torch import nn
from torchvision.models import mobilenet_v2
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Compose, InterpolationMode, Resize, CenterCrop, Normalize, PILToTensor, ConvertImageDtype, ToTensor

from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import cv2
from PIL import Image

from tqdm import tqdm

import os

In [4]:
model = mobilenet_v2(weights='IMAGENET1K_V1')
model = torch.nn.Sequential(*(list(model.children())[:-1]))

In [9]:
model.train()

Sequential(
  (0): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, m

In [11]:
model.cuda()

Sequential(
  (0): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, m

In [12]:
def get_images_labels(path:str) -> pd.DataFrame:
    images_paths    = []
    images_labels   = []
    count           = os.listdir(path)
    total           = len(count)
    for root,folder,file in os.walk(path):
        if(len(file) > 0):
            print(f"Classe atual: {root} -> {len(count)}/{total}")
            aux_      = [f'{root}/{x}' for x in file if x.endswith('.jpeg')]
            label_    = root.split('/')[-1]
            images_paths.extend(aux_)
            images_labels.extend([label_]*len(aux_))
            count.remove(label_)
            
    return pd.DataFrame({
    'images_paths'   : images_paths,
    'images_labels'  : images_labels
    })

In [13]:
df = get_images_labels('/home/lumalfa/datasets/raw-img/')
map_labels = {j:i for i,j in enumerate(df['images_labels'].unique())}
df['images_labels'] = df['images_labels'].apply(lambda x: map_labels[x])

Classe atual: /home/lumalfa/datasets/raw-img/pecora -> 10/10
Classe atual: /home/lumalfa/datasets/raw-img/cane -> 9/10
Classe atual: /home/lumalfa/datasets/raw-img/ragno -> 8/10
Classe atual: /home/lumalfa/datasets/raw-img/cavallo -> 7/10
Classe atual: /home/lumalfa/datasets/raw-img/gallina -> 6/10
Classe atual: /home/lumalfa/datasets/raw-img/scoiattolo -> 5/10
Classe atual: /home/lumalfa/datasets/raw-img/farfalla -> 4/10
Classe atual: /home/lumalfa/datasets/raw-img/gatto -> 3/10
Classe atual: /home/lumalfa/datasets/raw-img/elefante -> 2/10
Classe atual: /home/lumalfa/datasets/raw-img/mucca -> 1/10


In [14]:
train, aux = train_test_split(df,test_size=0.3,random_state=69,stratify=df['images_labels'])
val, test = train_test_split(aux,test_size=0.3,random_state=69,stratify=aux['images_labels'])

In [15]:
class DatasetAnimal(Dataset):
    def __init__(self, images_path, labels, transform_data = None, resize = (224,224)):
        self.images_path      = images_path
        self.labels           = labels
        self.transform_data   = transform_data
        self.resize           = resize
        
    def __getitem__(self,index):
        anchor = self.images_path[index]
        current_class = self.labels[index]
        
        negative_index = np.random.randint(0,self.__len__())
        while(self.labels[negative_index] == current_class):
            negative_index = np.random.randint(0,self.__len__())
        negative = self.images_path[negative_index]
        
        positive_index = np.random.randint(0,self.__len__())
        while(self.labels[positive_index] != current_class):
            positive_index = np.random.randint(0,self.__len__())
        positive = self.images_path[positive_index]
        
        
        """positive_label = self.dataframe.iloc[0,:]
        aux_negative   = self.dataframe[self.dataframe[self.col_label_name] != positive_label]
        aux_positive   = self.dataframe[self.dataframe[self.col_label_name] == positive_label]
        
        aux_negative   = train_test_split(
            aux_negative,
            train_size=self.len_samples,
            stratify=aux_negative[self.col_label_name]
        )[0]
        
        aux_positive   = aux_positive.sample(self.len_samples)
        
        anchor         = aux_positive.sample(self.len_samples)
        
        anchor         = anchor['images_paths']
        aux_positive   = aux_positive['images_paths']
        aux_negative   = aux_negative['images_paths']"""
        
        anchor         = self._read_img_path(anchor)
        positive       = self._read_img_path(positive)
        negative       = self._read_img_path(negative)
        
        return anchor, positive, negative
    
    def __len__(self):
        return len(self.images_path)
    
    def _read_img_path(self,path):
        #img = cv2.imread(path)
        #img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        #img = cv2.resize(img,self.resize)
        
        img = Image.open(path)
        #img = torch.tensor(img).astype(float)
        #img = torch.tensor(img,dtype=float)
        if(self.transform_data is not None):
            img = transform(img)

        return img
        

In [16]:
transform = Compose([
    Resize(256),
    CenterCrop(224),
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])

train_dataset   = DatasetAnimal(train['images_paths'].tolist(), train['images_labels'].tolist(), transform_data=transform)
val_dataset     = DatasetAnimal(val['images_paths'].tolist(), val['images_labels'].tolist(), transform_data=transform)
test_dataset    = DatasetAnimal(test['images_paths'].tolist(), test['images_labels'].tolist(), transform_data=transform)

triplet_loss = torch.nn.TripletMarginLoss(margin=1.0, p=2)

train_dataloader = DataLoader(train_dataset, num_workers=0, batch_size=5)

In [20]:
model.train()
for anchor, positive, negative in tqdm(train_dataloader,total=train_dataloader.__len__()):
    anchor_pred = model(anchor.cuda())
    positive_pred = model(positive.cuda())
    negative_pred = model(negative.cuda())
    
    loss = triplet_loss(anchor_pred,positive_pred,negative_pred)
    loss.backward()
    #break

100%|███████████████████████████████████████████████████████████████████████████████| 3390/3390 [23:31<00:00,  2.40it/s]


In [21]:
loss

tensor(1.1859, device='cuda:0', grad_fn=<MeanBackward0>)