# Análise dos dados iwildcam2020

#### Bibliotecas necessárias

In [600]:
import math

import pandas as pd
import json
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.gridspec as gridspec
import seaborn as sns

%matplotlib inline

from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras import optimizers
from tensorflow.keras import models

### Carregando datasets iwildcam2020

In [603]:
images_train_path = '/data/alberto/iWildCam2020/data/train/'


In [604]:
file = '/data/alberto/iWildCam2020/data/metadata/iwildcam2020_train_annotations.json'
with open(file) as json_data:
    data = json.load(json_data)
annotations = pd.DataFrame.from_dict(data['annotations']).set_index('id')
images = pd.DataFrame.from_dict(data['images']).set_index('file_name')[['seq_id', 'frame_num', 'seq_num_frames', 'datetime', 'location', 'height', 'width']]
classes = pd.DataFrame.from_dict(data['categories']).set_index('id')

In [605]:
test_file = '/data/alberto/iWildCam2020/data/metadata/iwildcam2020_test_information.json'
with open(test_file) as json_data:
    test_data = json.load(json_data)
test_images = pd.DataFrame.from_dict(test_data['images']).set_index('file_name')[['seq_id', 'frame_num', 'seq_num_frames', 'datetime', 'location', 'height', 'width']]
test_classes = pd.DataFrame.from_dict(test_data['categories']).set_index('id')

### Fazendo a junção das instancias

In [606]:
instances = annotations.copy()
instances['image_id'] = instances['image_id'] + '.jpg'
instances = instances.join(images, on='image_id')
instances = instances.join(classes[['name']], on='category_id')

In [607]:
instances.head()

Unnamed: 0_level_0,count,image_id,category_id,seq_id,frame_num,seq_num_frames,datetime,location,height,width,name
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
a292dd3c-21bc-11ea-a13a-137349068a90,1,96b00332-21bc-11ea-a13a-137349068a90.jpg,73,96b004ea-21bc-11ea-a13a-137349068a90,1,1,2013-08-08 11:45:00.000,267,1222,1795,canis lupus
a0afcfc0-21bc-11ea-a13a-137349068a90,1,879d74d8-21bc-11ea-a13a-137349068a90.jpg,4,879d7654-21bc-11ea-a13a-137349068a90,1,1,2013-08-01 22:31:00.000,264,1772,2739,cuniculus paca
a306e9c0-21bc-11ea-a13a-137349068a90,1,9017f7aa-21bc-11ea-a13a-137349068a90.jpg,227,9017f8cc-21bc-11ea-a13a-137349068a90,1,1,2013-04-17 11:15:00.000,45,1222,1891,psophia crepitans
9eed94c4-21bc-11ea-a13a-137349068a90,1,90d93c58-21bc-11ea-a13a-137349068a90.jpg,250,90d93e38-21bc-11ea-a13a-137349068a90,1,1,2013-04-23 08:26:00.000,45,1222,1795,aguila sp
a2a4dd7a-21bc-11ea-a13a-137349068a90,1,887cd0ec-21bc-11ea-a13a-137349068a90.jpg,2,887cd29a-21bc-11ea-a13a-137349068a90,1,1,2013-04-24 13:51:00.000,45,1196,1812,tayassu pecari


In [608]:
instances.info()

<class 'pandas.core.frame.DataFrame'>
Index: 217959 entries, a292dd3c-21bc-11ea-a13a-137349068a90 to 9a75989c-21bc-11ea-a13a-137349068a90
Data columns (total 11 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   count           217959 non-null  int64 
 1   image_id        217959 non-null  object
 2   category_id     217959 non-null  int64 
 3   seq_id          217959 non-null  object
 4   frame_num       217959 non-null  int64 
 5   seq_num_frames  217959 non-null  int64 
 6   datetime        217959 non-null  object
 7   location        217959 non-null  int64 
 8   height          217959 non-null  int64 
 9   width           217959 non-null  int64 
 10  name            217959 non-null  object
dtypes: int64(7), object(4)
memory usage: 20.0+ MB


In [609]:
instances.columns.values

array(['count', 'image_id', 'category_id', 'seq_id', 'frame_num',
       'seq_num_frames', 'datetime', 'location', 'height', 'width',
       'name'], dtype=object)

### Particionando as instâncias com base nas categorias

In [651]:
instances_locations_per_classes = pd.crosstab(instances.category_id,instances.name)

In [652]:
instances_locations_per_classes.astype(bool).sum(axis=0).sort_values()

name
acinonyx jubatus          1
nesocharis capistrata     1
niltava sumatrana         1
nothocrax urumutum        1
odocoileus virginianus    1
                         ..
helarctos malayanus       1
helogale parvula          1
hemigalus derbyanus       1
francolinus africanus     1
xerus rutilus             1
Length: 216, dtype: int64

In [653]:
instances_locations_per_classes

name,acinonyx jubatus,acryllium vulturinum,aepyceros melampus,agouti paca,aguila sp,alcelaphus buselaphus,alopochen aegyptiaca,andropadus gracilirostris,andropadus latirostris,andropadus virens,...,unidentifiable,unknown,unknown bat,unknown bird,unknown dove,unknown raptor,unknown rat,urocyon cinereoargenteus,varanus salvator,xerus rutilus
category_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
567,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
568,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
569,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,10,0,0,0,0,0
570,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Preparando a filtragem das categorias para as suas respectivas pastas

#### Nessa etapa, é feita a filtragem das categorias para armazená-las em 3 pastas : 
    1 - pasta_no_empty : Nela conterá somente as categorias dos animais, todos os animais
    2 - pasta_empty : Nela conterá somente a própria categoria empty
    3 - pasta_descarte : Nela conterá 11 categorias descartadas no experimento. As categorias dessa pasta são:             'end','misfire', 'motorcycle', 'start','unidentifiable', 'unknown', 'unknown bat', 'unknown bird',                 'unknown dove', 'unknown raptor', 'unknown rat'

In [654]:
instances_locations_per_classes.columns.values

array(['acinonyx jubatus', 'acryllium vulturinum', 'aepyceros melampus',
       'agouti paca', 'aguila sp', 'alcelaphus buselaphus',
       'alopochen aegyptiaca', 'andropadus gracilirostris',
       'andropadus latirostris', 'andropadus virens', 'aramides cajanea',
       'aramus guarauna', 'arborophila rubrirostris', 'arctonyx hoevenii',
       'ardeotis kori', 'argusianus argus', 'atherurus africanus',
       'ave desconocida', 'bos taurus', 'brotogeris sp',
       'burhinus capensis', 'callosciurus notatus', 'camelus dromedarius',
       'canis adustus', 'canis familiaris', 'canis latrans',
       'canis lupus', 'canis mesomelas', 'capra aegagrus',
       'capricornis sumatraensis', 'caracal caracal',
       'cephalophus nigrifrons', 'cephalophus silvicultor',
       'cercopithecus lhoesti', 'cercopithecus mitis', 'cerdocyon thous',
       'chalcophaps indica', 'chlorocebus pygerythrus',
       'claravis pretiosa', 'collocalia linchi', 'colomys goslingi',
       'conepatus semistri

#### Criando a pasta contendo somente as categorias dos animais

In [655]:
pasta_no_empty = ['acinonyx jubatus', 'acryllium vulturinum', 'aepyceros melampus',
       'agouti paca', 'aguila sp', 'alcelaphus buselaphus',
       'alopochen aegyptiaca', 'andropadus gracilirostris',
       'andropadus latirostris', 'andropadus virens', 'aramides cajanea',
       'aramus guarauna', 'arborophila rubrirostris', 'arctonyx hoevenii',
       'ardeotis kori', 'argusianus argus', 'atherurus africanus',
       'ave desconocida', 'bos taurus', 'brotogeris sp',
       'burhinus capensis', 'callosciurus notatus', 'camelus dromedarius',
       'canis adustus', 'canis familiaris', 'canis latrans',
       'canis lupus', 'canis mesomelas', 'capra aegagrus',
       'capricornis sumatraensis', 'caracal caracal',
       'cephalophus nigrifrons', 'cephalophus silvicultor',
       'cercopithecus lhoesti', 'cercopithecus mitis', 'cerdocyon thous',
       'chalcophaps indica', 'chlorocebus pygerythrus',
       'claravis pretiosa', 'collocalia linchi', 'colomys goslingi',
       'conepatus semistriatus', 'crax rubra', 'cricetomys gambianus',
       'crocuta crocuta', 'cuniculus paca', 'cuon alpinus',
       'dasyprocta fuliginosa', 'dasyprocta punctata',
       'dasypus novemcinctus', 'dendrocitta occipitalis',
       'deomys ferrugineus', 'didelphis marsupialis', 'didelphis sp',
       'dioptrornis fischeri', 'eira barbara',
       'equus africanus', 'equus ferus', 'equus grevyi', 'equus quagga',
       'erithacus cyane', 'eudorcas thomsonii', 'eupodotis senegalensis',
       'eurocephalus rueppelli', 'felis silvestris',
       'francolinus africanus', 'francolinus nobilis',
       'funisciurus carruthersi', 'genetta genetta', 'genetta servalina',
       'genetta tigrina', 'geotrygon montana', 'geotrygon sp',
       'giraffa camelopardalis', 'helarctos malayanus',
       'helogale parvula', 'hemigalus derbyanus', 'herpestes sanguineus',
       'herpestes semitorquatus', 'hippopotamus amphibius',
       'hyaena hyaena', 'hybomys univittatus', 'hylomyscus stella',
       'hystrix brachyura', 'hystrix cristata', 'ichneumia albicauda',
       'ictonyx striatus', 'kobus ellipsiprymnus', 'lariscus insignis',
       'leiothrix argentauris', 'leopardus pardalis', 'leopardus wiedii',
       'leptailurus serval', 'leptotila plumbeiceps', 'lepus saxatilis',
       'lissotis melanogaster', 'lophotis gindiana',
       'lophura erythrophthalma', 'lophura inornata', 'lophura sp',
       'loxodonta africana', 'lycaon pictus', 'macaca fascicularis',
       'macaca nemestrina', 'madoqua guentheri', 'malacomys longipes',
       'manis javanica', 'martes flavigula', 'mazama  temama',
       'mazama americana', 'mazama gouazoubira', 'mazama pandora',
       'mazama sp', 'mazama temama', 'meleagris ocellata',
       'melocichla mentalis', 'mesopicos griseocephalus', 
       'momotus momota', 'motacilla flava', 
       'muntiacus muntjak', 'mus minutoides', 'musophaga rossae',
       'mustela lutreolina', 'myiophoneus caeruleus',
       'myiophoneus glaucinus', 'myiophoneus melanurus',
       'myoprocta pratti', 'myrmecophaga tridactyla', 'nandinia binotata',
       'nanger granti', 'nasua narica', 'nasua nasua', 'neofelis diardi',
       'nesocharis capistrata', 'niltava sumatrana', 'nothocrax urumutum',
       'odocoileus virginianus', 'oenomys hypoxanthus', 'ortalis vetula',
       'orycteropus afer', 'oryx beisa', 'otocyon megalotis',
       'ovis aries', 'paguma larvata', 'paleosuchus sp',
       'pan troglodytes', 'panthera leo', 'panthera onca',
       'panthera pardus', 'panthera tigris', 'papio anubis',
       'paraxerus boehmi', 'pardofelis marmorata',
       'pardofelis temminckii', 'pecari tajacu', 'penelope purpurascens',
       'peromyscus sp', 'phacochoerus africanus', 'phaetornis sp',
       'philander opossum', 'polyplectron chalcurum',
       'potamochoerus larvatus', 'praomys tullbergi', 'presbytis thomasi',
       'prionailurus bengalensis', 'prionodon linsang',
       'procavia capensis', 'procyon cancrivorus', 'procyon lotor',
       'proechimys sp', 'proteles cristata', 'protoxerus stangeri',
       'psophia crepitans', 'puma concolor', 'puma yagoroundi',
       'puma yagouaroundi', 'raphicerus campestris', 'rollulus rouloul',
       'rusa unicolor', 'sciurus sp', 'spilornis cheela', 
       'streptopelia lugens', 'streptopilia senegalensis',
       'struthio camelus', 'sus scrofa', 'sylvilagus brasiliensis',
       'syncerus caffer', 'tamandua mexicana', 'tapirus bairdii',
       'tapirus terrestris', 'tayassu pecari', 'thamnomys venustus',
       'tigrisoma mexicanum', 'tinamus major', 'tragelaphus oryx',
       'tragelaphus scriptus', 'tragelaphus strepsiceros', 'tragulus sp',
       'turdus olivaceus', 'turtur calcospilos', 'turtur tympanistria',
       'urocyon cinereoargenteus', 'varanus salvator', 'xerus rutilus']

In [656]:
pasta_no_empty = instances_locations_per_classes.filter(items=pasta_no_empty)

In [657]:
pasta_no_empty

name,acinonyx jubatus,acryllium vulturinum,aepyceros melampus,agouti paca,aguila sp,alcelaphus buselaphus,alopochen aegyptiaca,andropadus gracilirostris,andropadus latirostris,andropadus virens,...,tragelaphus oryx,tragelaphus scriptus,tragelaphus strepsiceros,tragulus sp,turdus olivaceus,turtur calcospilos,turtur tympanistria,urocyon cinereoargenteus,varanus salvator,xerus rutilus
category_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
567,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
568,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
569,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
570,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Criando a pasta contendo as categorias que serão ignoradas/descartadas do experimento

In [714]:
pasta_descarte =[ 'end','misfire', 'motorcycle', 'start',
       'unidentifiable', 'unknown', 'unknown bat', 'unknown bird',
       'unknown dove', 'unknown raptor', 'unknown rat']

In [715]:
pasta_descarte = instances_locations_per_classes.filter(items=pasta_descarte)

In [716]:
pasta_descarte

name,end,misfire,motorcycle,start,unidentifiable,unknown,unknown bat,unknown bird,unknown dove,unknown raptor,unknown rat
category_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
567,0,0,0,0,0,0,0,0,0,0,0
568,0,0,0,0,0,0,0,0,0,0,0
569,0,0,0,0,0,0,0,0,10,0,0
570,0,0,0,0,0,0,0,0,0,0,0


In [717]:
pasta_descarte.columns.values

array(['end', 'misfire', 'motorcycle', 'start', 'unidentifiable',
       'unknown', 'unknown bat', 'unknown bird', 'unknown dove',
       'unknown raptor', 'unknown rat'], dtype=object)

In [728]:
grupo_descarte = pasta_descarte.groupby([ 'end','misfire', 'motorcycle', 'start',
       'unidentifiable', 'unknown', 'unknown bat', 'unknown bird',
       'unknown dove', 'unknown raptor', 'unknown rat'], as_index=False)

In [729]:
grupo_descarte.head()

name,end,misfire,motorcycle,start,unidentifiable,unknown,unknown bat,unknown bird,unknown dove,unknown raptor,unknown rat
category_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,0
79,0,0,0,0,0,2500,0,0,0,0,0
177,0,0,0,0,0,0,0,30,0,0,0
198,0,0,0,0,0,0,7,0,0,0,0
290,0,0,0,0,520,0,0,0,0,0,0
347,0,0,0,223,0,0,0,0,0,0,0


#### Criando a pasta contendo a categoria empty, ou seja, somente o background

In [720]:
pasta_empty = ['empty']

In [721]:
pasta_empty

['empty']

In [722]:
pasta_empty = instances_locations_per_classes.filter(items=pasta_empty)

In [723]:
pasta_empty

name,empty
category_id,Unnamed: 1_level_1
0,74217
2,0
3,0
4,0
6,0
...,...
567,0
568,0
569,0
570,0
