In [1]:
import os
import xml.etree.ElementTree as ET
import pandas as pd

In [49]:
labels_mapping = {
  'grey reef shark': {
    'family': 'Carcharhinidae',
    'genus': 'Carcharhinus',
    'species': 'Amblyrhynchos'
  }
}

In [56]:
# Define the path to the folder containing the images and annotations
subfolders = ['test', 'train', 'valid']
main_folder = '../raw/Seychelles.v34i.voc/'
new_folder = '../images/rf3/'

# Initialize an empty list to store the data
data = []

# iter = 0
names = set()

# Loop through all files in the folder
for subfolder in subfolders:
  folder_path = os.path.join(main_folder, subfolder)
  # if iter == 100:
  #   break
  for filename in os.listdir(folder_path):
      # iter += 1
      # if iter == 100:
      #   break
      if filename.endswith('.xml'):  # Check if the file is an XML file
          file_path = os.path.join(folder_path, filename)
          tree = ET.parse(file_path)
          root = tree.getroot()

          # Extract the necessary information
          jpg_filename = root.find('filename').text

          # Move image to new folder
          to_move = False

          for obj in root.findall('object'):
              name = obj.find('name').text.lower()
              names.add(name)
              if name in labels_mapping:
                to_move = True
                bndbox = obj.find('bndbox')
                xmin = bndbox.find('xmin').text
                xmax = bndbox.find('xmax').text
                ymin = bndbox.find('ymin').text
                ymax = bndbox.find('ymax').text

                # Append the data to the list
                data.append({
                    'Filename': f'rf3_{jpg_filename}',
                    'Family': labels_mapping[name]['family'],
                    'Genus': labels_mapping[name]['genus'],
                    'Species': labels_mapping[name]['species'],
                    'ymin': ymin,
                    'xmin': xmin,
                    'xmax': xmax,
                    'ymax': ymax,
                    'Augmentation': None,
                    'Source': 'RF3'
                })

          if to_move:
            os.rename(os.path.join(folder_path, jpg_filename), os.path.join(new_folder, f'rf3_{jpg_filename}'))



            

# Create a DataFrame from the data
df = pd.DataFrame(data)

In [57]:
df

Unnamed: 0,Filename,Family,Genus,Species,ymin,xmin,xmax,ymax,Augmentation,Source
0,rf3_BR1_Full_mp4-1208_jpg.rf.bfeeef0088cee1fee...,Carcharhinidae,Carcharhinus,Amblyrhynchos,694,1244,1804,1081,,RF3
1,rf3_BR1_Full_mp4-1580_jpg.rf.aac9ee20ea97a8880...,Carcharhinidae,Carcharhinus,Amblyrhynchos,50,1,55,167,,RF3
2,rf3_BR1_Full_mp4-1589_jpg.rf.b38bc947d7bd8ef5e...,Carcharhinidae,Carcharhinus,Amblyrhynchos,113,91,281,369,,RF3
3,rf3_BR1_Full_mp4-1589_jpg.rf.cb1a9363286a61eea...,Carcharhinidae,Carcharhinus,Amblyrhynchos,84,254,922,647,,RF3
4,rf3_BR1_Full_mp4-1604_jpg.rf.34774560a45d9e873...,Carcharhinidae,Carcharhinus,Amblyrhynchos,487,431,612,620,,RF3
...,...,...,...,...,...,...,...,...,...,...
4502,rf3_BR1_Full_mp4-6579_jpg.rf.6fee8f954b78298ec...,Carcharhinidae,Carcharhinus,Amblyrhynchos,111,83,575,598,,RF3
4503,rf3_BR1_Full_mp4-6580_jpg.rf.d07743ea3182120b8...,Carcharhinidae,Carcharhinus,Amblyrhynchos,64,32,473,517,,RF3
4504,rf3_BR1_Full_mp4-6583_jpg.rf.25090228c1eccc283...,Carcharhinidae,Carcharhinus,Amblyrhynchos,1,1,255,315,,RF3
4505,rf3_BR1_Full_mp4-6583_jpg.rf.25090228c1eccc283...,Carcharhinidae,Carcharhinus,Amblyrhynchos,86,387,432,159,,RF3


In [58]:
df.count()

Filename        4507
Family          4507
Genus           4507
Species         4507
ymin            4507
xmin            4507
xmax            4507
ymax            4507
Augmentation       0
Source          4507
dtype: int64

In [59]:
df.to_csv('../annotations/RF3.csv', index=False)