In this notebook, it is possible to load the history.csv file from the respective webcam containing the detections made by the preliminary model. Then, it is possible to randomly extract frames containing a certain species with a confidence level greater than or equal to 0.99.

In [None]:
import os
import cv2
import pandas as pd
from PIL import Image

# Utils Functions

In [None]:
def convert_string_to_list(string, convert_to_float=False, delimiter=","):
    converted_list = string.strip('][ ').replace("'", "").split(delimiter)
    
    if convert_to_float:
        converted_list = [float(element.strip()) for element in converted_list if element != ""]
    
    return converted_list

In [None]:
def randomly_select_frames(history):
    return history.sample(1500, replace=False, random_state=42)

In [None]:
def filter_by_detections_number(history, detections_number=2):
    bool_mask = history['labels'].apply(lambda label: 
                    True if len(convert_string_to_list(label)) >= detections_number else False)
    return history[bool_mask]
    

In [None]:
def create_used_index_file(filepath):
    used_index = pd.DataFrame(columns=['used_index', 'label'])
    used_index.to_csv(filepath, index=False)
    

In [None]:
def load_used_index(history_path):
    used_index_filename = 'used_index.csv'   
    
    if not os.path.isfile(history_path+used_index_filename):
        create_used_index_file(history_path+used_index_filename)
            
    used_index = pd.read_csv(history_path+used_index_filename)
    
    # backup used_index
    os.system(f'cp {history_path+used_index_filename} {history_path}used_index_bkp.csv')
    
    return used_index

In [None]:
def load_history_file(history_path):
    
    history_filename = 'history.csv'
    history = pd.read_csv(history_path+history_filename)
    
    # backup history file
    os.system(f'cp {history_path+history_filename} {history_path}history_bkp.csv')
    
    return history

In [None]:
def extract_frame(filename, frame_pos, webcam_no, detected_videos_path, extraceted_images_path):
    
    image_filename = filename.replace('.mp4', f'_{frame_pos}_webcam0{webcam_no}.jpg')
    
    try:
        video = cv2.VideoCapture(f'{detected_videos_path}{filename}')
        video.set(cv2.CAP_PROP_POS_FRAMES, frame_pos)
        ret, frame = video.read()
        
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = Image.fromarray(frame)
        frame.save(f'{extracted_images_path}{image_filename}')
    except Exception as e:
        print(f'It was not possible to open the file {filename} due to: {e}')
        
        

# Load History

In [None]:
webcam_no = 2 # webcam number (1 or 2)

# path to the folder containing the recordings as well as the detection history csv file
webcam_path = f'/media/lucaszampar/BACKUP/webcam0{webcam_no}/' 

history_path = f'{webcam_path}history/' # path to the history CSV file of the respective webcam
extracted_images_path = f'{webcam_path}extracted_images/' # path to save the extracted frames
detected_videos_path = f'{webcam_path}detected_videos/' # path to the recordings detected by the preliminary model 


In [None]:
history = load_history_file(history_path)
used_index = load_used_index(history_path) 

history.drop(used_index['used_index'], inplace=True) # drops indexes already used to avoid repeated data 

In [None]:
# extracts the index, labels and confidence leves of the detectetions for each row of the dataframe
dict_labels_scores = {}

for index, row in history.iterrows():
    labels = convert_string_to_list(row['labels'])
    scores = convert_string_to_list(row['scores'], convert_to_float=True, delimiter=" ")
    
    dict_labels_scores[index] = list(zip(labels, scores))

# Pick indexes with highest probability

In [None]:
indices = []

label = "chupim" # portuguese species name
score = 0.99 # confidence level score

# selects only the detections containing the species and at least one detection with confidence level higher than 0.99
for index, list_label_score  in dict_labels_scores.items():
    for element in list_label_score: 
        
        if element[0] == label and element[1] >= score:
            indices.append(index)

In [None]:
# row indexes with detectetions containing the species are selected
indices = pd.Series(indices)
indices.index = indices

# Save images

In [None]:
n = 200 # number of frames to randomly extract

In [None]:
# randomly selects a fixed number of detections
samples = history.loc[indices].sample(n, replace=False, random_state=42)

In [None]:
# the selected frames are droped
history.drop(samples.index, inplace=True)
indices.drop(samples.index, inplace=True)

In [None]:
# the selected frames are extracted and saved
for index, row in samples.iterrows():
    extract_frame(row['filename'], row['frame_pos'], webcam_no, detected_videos_path,  extracted_images_path)

In [None]:
# the indexes of the history dataframe rows are saved in another file to avoid repeated data
samples_index = samples.index.to_frame()
samples_index.columns = ['used_index']
samples_index['label'] = label

used_index = pd.concat([used_index, samples_index], axis=0)
used_index.to_csv(history_path+'used_index.csv', index=False)

The lines 5 above were executed when any extracted frame did not contain the species or contained some distortion.