In [1]:
from __future__ import print_function
import pandas as pd
import numpy as np
import csv
import os
import torch

import random

from torch.utils.data import Dataset, DataLoader
from PIL import Image
from skimage import io
from torchvision import datasets, transforms

verbose = False


In [2]:
class Chimp_Dataset(Dataset):
    def __init__(self, dat_folder,transform,percent,train = True):
        super(Chimp_Dataset, self).__init__()
        self.transform = transform
        self.percent = percent
        self.root_dir = os.path.dirname(dat_folder) 
        
        self.csv_file_train = os.path.join(self.root_dir, "line_data_Train.csv")
        self.csv_file_test = os.path.join(self.root_dir, "line_data_Test.csv") 
        self.num_ind_file = os.path.join(self.root_dir,"num_ind_file.csv")
        
        num_ind = []
        
        if not os.path.basename(self.csv_file_train) in os.listdir(self.root_dir) or not os.path.basename(self.csv_file_test)  in os.listdir(self.root_dir): 
        #list of folders 
            with open(os.path.join(self.root_dir,'annotations_czoo.txt'),"r") as f:          
                x_split= [x.split(' ') for x in f.readlines()]

            filenames = [x[1] for x in x_split]
            ind = [x[3] for x in x_split]
            known_ind = []           
            
            for n_i,i in enumerate(ind):                
                if not i in known_ind:
                    position_i = [nn for nn,n in enumerate(ind) if n == i]
                    N = len(position_i)*0.8
                    if N > 3:
                        random.shuffle(position_i)
                
                        line_list = []
                        for j in range(int(N)):                            
                            if os.path.isfile(os.path.join(self.root_dir,filenames[position_i[j]])):
                                image_filename = filenames[position_i[j]]
                                if verbose: print("image {}{}".format(j,image_filename))
                                line = [str(j),image_filename,str(n_i)]
                                line_list.append([line])
                        
                        with open(self.csv_file_train,"a") as f:
                            writer = csv.writer(f,delimiter=',')    
                            for lines in line_list:
                                writer.writerows(lines)
                                
                        line_list = []
                        for j in range(int(N),len(position_i)):
                            if os.path.isfile(os.path.join(self.root_dir,filenames[position_i[j]])):
                                image_filename = filenames[position_i[j]]
                                if verbose: print("image {}{}".format(j,image_filename))
                                line = [str(j),image_filename,str(n_i)]
                                line_list.append([line])
                        
                        with open(self.csv_file_test,"a") as f:
                            writer = csv.writer(f,delimiter=',')     
                            for lines in line_list:
                                writer.writerows(lines)
                        
                    known_ind.append(i)
                    num_ind.append(1)
                else:
                    n = known_ind.index(i)
                    num_ind[n] += 1
                        
            print('individuals {}'.format(len(known_ind)))
            print('num_ind {}'.format(len(num_ind)))
            self.num_ind = num_ind
            lines = [[str(l),str(self.num_ind[l])] for l in range(len(self.num_ind))]
            with open(self.num_ind_file,"w") as f:
                writer = csv.writer(f,delimiter =',')
                for line in lines:
                    writer.writerows([line])
        else:
            num_ind_file = pd.read_csv(self.num_ind_file)
            for l in range(len(num_ind_file)):
                num_ind.append(int(num_ind_file.iloc[l,1]))
            self.num_ind = num_ind
        if train: self.CHIM_datafile = pd.read_csv(self.csv_file_train)
        else: self.CHIM_datafile = pd.read_csv(self.csv_file_test) 
            
    def __len__(self):
        return len(self.CHIM_datafile)
    
    def __getitem__(self,idx):
        img_name = self.CHIM_datafile.iloc[idx,1]
        image = self.__loadfile(os.path.join(self.root_dir,img_name))
        target = self.CHIM_datafile.iloc[idx,2]
        if self.transform:
            image = Image.fromarray(image)
            sample = self.transform(image)
        else:
            sample = image
        return (sample,target)
    
    def __loadfile(self, data_file):
        image = io.imread(data_file)
        if len(image.shape)<3:
            image = np.stack((image,)*3, axis=-1)
        return image
    
    def prepare_batch(self,percent = 0.2):

        train_line_list = []        
        labels_train = []        
        known_labels = []
        

        for i in range(len(self.CHIM_datafile)):
            label = self.CHIM_datafile.iloc[i,2]

            min_numind = int(np.min(self.num_ind)*percent)
            if not label in known_labels: 
                position_i = [nn for nn in range(len(self.CHIM_datafile)) if self.CHIM_datafile.iloc[nn,2] == label]

                N = len(position_i)      
                if N > min_numind:
                    random.shuffle(position_i)

                    for j in range(min_numind):
                        if os.path.isfile(os.path.join(self.root_dir,self.CHIM_datafile.iloc[j,1])):
                            image_filename = self.CHIM_datafile.iloc[j,1]
                            if verbose: print("image {}{}".format(j,image_filename))
                            train_line_list.append(image_filename)
                            labels_train.append(int(label))

                    known_labels.append(label)
                #print('j {}'.format(j))
    
        image_train = [self.transform(Image.fromarray(self.__loadfile(os.path.join(self.root_dir,train_line_list[i])))) for i in range(len(train_line_list))]  
        return image_train, labels_train

In [3]:
#filename = '/home/rita/JupyterProjects/EYE-SEA/DataSets/Verification/chimpanzee_faces-master/datasets_cropped_chimpanzee_faces/data_CZoo/annotations_czoo.txt'
#transform = transforms.Compose([
#    transforms.Resize((224,224)),
#    transforms.ToTensor(),        
#    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
#])
#c = Chimp_Dataset(filename,transform,0.8)
#c_t = Chimp_Dataset(filename,transform,0.8,train=False)
#image_train, labels_train = c.prepare_batch(0.2)

In [5]:
#import import_ipynb
#import ResNetCaps_E
#import losses

#import torch.nn as nn
#import torch.optim as optim
#from torch.optim import lr_scheduler
#from torch.optim import Adam
#import torch.nn.functional as F

#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#model = ResNetCaps_E.ResNetCaps_E(DigitEnd=True)

#if torch.cuda.device_count() > 1:
#    print("Let's use", torch.cuda.device_count(), "GPUs!")
  # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
#    model = nn.DataParallel(model)
#model = model.to(device)
#optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),lr = 0.001)
#criterion = losses.HAP2STripletLoss()

importing Jupyter notebook from ResNetCaps_E.ipynb
importing Jupyter notebook from losses.ipynb
importing Jupyter notebook from ohem.ipynb


In [7]:
#image_train = torch.stack(image_train)
#image_train = image_train.to(device)
#labels_train = torch.Tensor(labels_train).to(device)

#emb_a = model(image_train)
#emb_a = emb_a.view(image_train.size(0),-1)
#optimizer.zero_grad()
#loss = criterion(emb_a.squeeze(),labels_train)