Before this step, go to terminal and install pigeon by typing: pip install pigeon-jupyter

You will also need to install glob and IPython via Anaconda Navigator.

In [1]:
import glob
from pigeon import annotate
from IPython.display import display, Image
import random
import os
import pandas as pd
import numpy as np

mount = '/Volumes'

In [2]:
observations_path = mount + '/kenya_drones/processing/kenya-tracking/processed-videos/raw-footage'

In [3]:
# Get data directory
observations = np.load('../data/HH-data.npy', allow_pickle=True)

# Get track species IDs
track_ids = pd.read_csv(mount + '/kenya_drones/vigilance/tracksegs_sppIDs.csv')

In [4]:
# Read in previous annotations
completed_anns = pd.read_csv(mount + '/kenya_drones/vigilance/blair_head_annotations.csv')

In [5]:
len(completed_anns)

1087

In [6]:
completed_anns.score.value_counts()

head-up      860
head-down    227
Name: score, dtype: int64

In [7]:
# Define eligible images - must be in tracked observation
tracked_obs =[]
for i in observations:
    if i['use-in-HUHD-model'] == True:
        tracked_obs.append(str('observation') + i['observation'])

# And must be zebras
zeb_tracks = track_ids[(track_ids['species'] == 'pz')|(track_ids['species'] == 'gz')]
zeb_tracks = zeb_tracks[zeb_tracks.observation.isin(tracked_obs)]

usable_tracks = zeb_tracks[['observation', 'track']]
usable_tracks = usable_tracks.drop_duplicates(ignore_index = True)

In [8]:
# Get random set of 200 images
frames_to_annotate = []
for i in range(200):
    rand_num = random.choice(np.arange(len(usable_tracks)))
    rand_ob = usable_tracks.iloc[rand_num]['observation']
    rand_ind = usable_tracks.iloc[rand_num]['track']
    rand_seg = random.choice(os.listdir(os.path.join(observations_path,rand_ob,'individual_images',rand_ind)))
    rand_frame = random.choice(os.listdir(os.path.join(observations_path,rand_ob,'individual_images',rand_ind, rand_seg)))
    frame_path = os.path.join(observations_path,rand_ob,'individual_images',rand_ind, rand_seg, rand_frame)
    frames_to_annotate.append(frame_path)

In [9]:
assets = frames_to_annotate

In [10]:
# annotate images
annotations = annotate(
  assets,
  options=['head-up', 'head-down', 'obscured'],
  display_fn=lambda filename: display(Image(filename,format='jpg'))
)

HTML(value='0 examples annotated, 201 examples left')

HBox(children=(Button(description='head-up', style=ButtonStyle()), Button(description='head-down', style=Butto…

Output()

Annotation done.


In [11]:
# transfer annotations to new variable so they don't get overwritten
round1 = annotations

In [12]:
# turn annotations into dataframe
new_anns = pd.DataFrame(round1, columns =['file', 'score'])

In [13]:
# remove obscured scores
new_anns = new_anns[new_anns.score != 'obscured']

In [14]:
# check if any of the files in the new annotations have already been scored
np.sum(new_anns['file'].isin(completed_anns['file']))

0

In [15]:
# if above cell is >0, check which rows are repeated
new_anns[new_anns['file'].isin(completed_anns['file'])]

Unnamed: 0,file,score


In [16]:
# # extract pixel coordinates for new annotations
# # Add columns for pixel coordinates of each image
# x_coord = []
# y_coord = []
# delim = '/'

# for i in new_anns.file:
#     # parse filename to get observation
#     ob = i.split('/')[7].split('n')[1]
#     # load the pixel-coordinate tracks for this observation
#     pixtracks = [d['pixel-tracks'] for d in observations if d['observation']==ob][0]
#     data = np.load(pixtracks, allow_pickle=True)
#     # parse filename to get the track and frame numbers
#     track = int(i.split('/')[9].split('-')[1])
#     frame = int(i.split('/')[11].split('_')[2].split('.')[0])
#     # the individual may not be present for all frames, so get the first frame of the track
#     first_frame = data[track]['first_frame']
#     # subtract the first frame for that individual from the frame number to get the adjusted frame number
#     frame_adj = frame-first_frame
#     # use track number and adjusted frame number to get the x and y pixel coordinates for the annotated image
#     x_co = data[track]['track'][frame_adj][1]
#     y_co = data[track]['track'][frame_adj][0]
#     # append the extracted coordinaets to the coordinates lists
#     x_coord.append(x_co)
#     y_coord.append(y_co)
    
# # add coordinate lists to new_anns as new columns
# new_anns['x_coord'] = x_coord
# new_anns['y_coord'] = y_coord

In [17]:
# adjust filenames so other people can read them in
new_anns['file'] = [i.split('Volumes')[1] for i in new_anns['file']]

In [18]:
new_anns = new_anns[['file', 'score']]
new_anns

Unnamed: 0,file,score
0,/kenya_drones/processing/kenya-tracking/proces...,head-up
1,/kenya_drones/processing/kenya-tracking/proces...,head-down
2,/kenya_drones/processing/kenya-tracking/proces...,head-up
3,/kenya_drones/processing/kenya-tracking/proces...,head-up
4,/kenya_drones/processing/kenya-tracking/proces...,head-up
...,...,...
104,/kenya_drones/processing/kenya-tracking/proces...,head-up
105,/kenya_drones/processing/kenya-tracking/proces...,head-up
106,/kenya_drones/processing/kenya-tracking/proces...,head-up
107,/kenya_drones/processing/kenya-tracking/proces...,head-up


In [19]:
# add new annotations to completed annotations
all_anns = completed_anns.append(new_anns)

In [20]:
# drop any duplicates
all_anns = all_anns.drop_duplicates()

In [23]:
# save all annotations
all_anns.to_csv('/Volumes/kenya_drones/vigilance/blair_head_annotations.csv', index = False)

Before annotation, I need to read in a csv of filenames/annotations that I've already done and compare the assets list to that dataframe. Then have the widget only show me files that are not already in the dataframe. Then at the end, I need to merge the new annotations into that dataframe and save the new dataframe as a csv.

I could also create a cell that goes through images that were skipped. Then, any of those images that had a non-skip annotation in the new annotations list should have their old skipped annotations replaced in the dataframe, and re-saved.

In [21]:
all_anns.score.value_counts()

head-up      935
head-down    259
Name: score, dtype: int64

In [22]:
new_anns.score.value_counts()

head-up      77
head-down    32
Name: score, dtype: int64