In [1]:
import pandas as pd
from viame_annotation import Viame
import os

In [2]:
def viame_to_standard(csv_path, source):
    # Load the CSV file
    df = pd.read_csv(csv_path, skiprows=lambda x: x in [1]) # skip row if metadata
    viame = Viame()

    # Initialize a list to hold the data for each row
    rows_list = []

    # Iterate over the rows of the DataFrame and process each annotation
    for index, row in df.iterrows():
        # Build the Filename
        frame_id = viame.get_frame_id(row)
        filename = f"{source}_frame{frame_id}.jpg"
        track_id = viame.get_track_id(row)
        # Extract Family, Genus, Species
        family, genus, species = viame.get_taxonomy(row)
        
        # Extract bounding box coordinates
        xmin, ymin, xmax, ymax = viame.get_bbox(row)
        
        # Prepare the new row as a Series
        new_row = pd.Series({
            'Filename': filename,
            'Family': family,
            'Genus': genus,
            'Species': species,
            'ymin': ymin,
            'xmin': xmin,
            'xmax': xmax,
            'ymax': ymax,
            'Augmentation': "none",
            'Source': source,
            'track_id': track_id,
            'frame_id': frame_id,
        })

        # Append the new Series to the list
        rows_list.append(new_row)

    converted_df = pd.DataFrame(columns=['Filename', 'Family', 'Genus', 'Species', 'ymin', 'xmin', 'xmax', 'ymax', 'Augmentation', 'Source', 'track_id', 'frame_id'])
    # Concatenate all the Series into a new DataFrame
    if len(rows_list) > 0:
        converted_df = pd.concat(rows_list, axis=1).transpose()

    # Write the converted DataFrame to a new CSV file
    return converted_df

In [None]:
def standard_to_viame(standard_df, original_csv_path, video_folder):
  # TODO: THIS IS CHATGPT. CHECK BACK
    # Read the original CSV to get the column names
    original_df = pd.read_csv(original_csv_path, nrows=0)
    
    # Should be 
    print(len(standard_df))

    # Create a new DataFrame with the same columns as the original
    viame_df = pd.DataFrame(columns=original_df.columns)

    # Iterate over the rows of the standard DataFrame and convert each back to the original format
    for index, row in standard_df.iterrows():
        # Rebuild the '10-11+: Repeated Species' field
        repeated_species = f"{row['Genus']} {row['Species']}"
        track_id = row['track_id']
        
        # Rebuild the '4-7: Img-bbox(TL_x,TL_y,BR_x,BR_y)' field
        tl_x = row['xmin']
        tl_y = row['ymin']
        br_x = row['xmax']
        br_y = row['ymax']
        bbox = f"{tl_x},{tl_y},{br_x},{br_y}"
        
        # Rebuild the '3: Unique Frame Identifier' field
        frame_id = int(row['Filename'].split('_frame')[1].split('.')[0]) // 30
        
        # Prepare the new row
        new_row = {
            '1: Detection or Track-id': "",  # Fill in or calculate as needed
            '2: Video or Image Identifier': video_folder,  # Assuming video_folder is equivalent to this field
            '3: Unique Frame Identifier': frame_id,
            '4-7: Img-bbox(TL_x,TL_y,BR_x,BR_y)': bbox,
            '8: Detection or Length Confidence': "",  # Fill in or calculate as needed
            '9: Target Length (0 or -1 if invalid)': 0,  # Assuming default value
            '10-11+: Repeated Species,Confidence Pairs or Attributes': repeated_species,
            '# 1: Detection or Track-id': track_id,
            # Add additional columns as needed
        }

        # Append the new row to the DataFrame
        # viame_df = viame_df.append(new_row, ignore_index=True)
        # DF has no attribute append 
        viame_df.loc[len(viame_df)] = new_row
    
    # Return the converted DataFrame
    return viame_df

In [None]:
import csv
import re


def convert_time(time_str):
    """
    Convert YOLO time format (00h:02m:57s:510ms) to VIAME format (00:02:57.510000)
    with proper millisecond to microsecond conversion (510ms → 510000)
    """
    # Extract components using regex
    match = re.match(r'(\d+)h:(\d+)m:(\d+)s:(\d+)ms', time_str)
    if not match:
        raise ValueError(f"Invalid time format: {time_str}")
    
    hours, minutes, seconds, milliseconds = match.groups()
    # Convert milliseconds to microseconds (6 digits) by multiplying by 1000
    microseconds = int(milliseconds) * 1000
    # Format as 6-digit zero-padded microseconds
    return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}.{microseconds:06d}"

def convert_yolo_to_viame(input_csv, output_csv):
    """
    Converts YOLO object detection CSV annotations to VIAME format.
    Preserves track metadata as comments above each detection row.
    Formats timestamp with proper microsecond conversion.
    """
    with open(input_csv, 'r') as in_file, open(output_csv, 'w', newline='') as out_file:
        # Write VIAME format header
        out_file.write("# 1: Detection or Track-id,2: Video or Image Identifier,3: Unique Frame Identifier,"
                       "4-7: Img-bbox(TL_x,TL_y,BR_x,BR_y),8: Detection or Length Confidence,"
                       "9: Target Length (0 or -1 if invalid),10-11+: Repeated Species,Confidence Pairs or Attributes\n")
        
        reader = csv.DictReader(in_file)
        for row in reader:
            # Convert and format timestamp
            viame_time = convert_time(row['time'])
            
            # Extract relevant fields
            frame = int(float(row['frame']))
            track_id = row['track_id']
            species = row['label']
            confidence = float(row['confidence'])
            
            # Process bounding box coordinates (round to integers)
            xmin = round(float(row['xmin']))
            ymin = round(float(row['ymin']))
            xmax = round(float(row['xmax']))
            ymax = round(float(row['ymax']))
            
            # Write track metadata as comment
            out_file.write(f"# track_metadata: {row['track_metadata']}\n")
            
            # Write VIAME-formatted detection line with corrected timestamp
            out_file.write(f"{track_id},{viame_time},{frame},{xmin},{ymin},{xmax},{ymax},"
                           f"{confidence:.6f},-1,{species},{confidence:.6f}\n")

# USAGE

In [25]:
# Example usage of convert yolo to viame

yolo_csv = '/vol/biomedic3/bglocker/ugproj/tk1420/sharktrack/ALv4-v8n_augs_best/C09_240_LGX1_conf0.25/internal_results/output.csv'
output_csv = '/vol/biomedic3/bglocker/ugproj/tk1420/SharkTrack-Dev/test_annotations_viame.csv'
if os.path.exists(output_csv):
    os.remove(os.path.join(output_csv))
    
convert_yolo_to_viame(yolo_csv, output_csv)




In [12]:
# Example usage of the function
# base_dir = '/vol/biomedic3/bglocker/ugproj2324/fv220/datasets/validation/annotations_10fps'
# annotation_folder = os.path.join(base_dir, 'annotations_viame')
# output_folder = os.path.join(base_dir, 'annotations_standard')
base_dir = '/vol/biomedic3/bglocker/ugproj/tk1420/SharkTrack-Videos/difficult3/'
annotation_folder = base_dir
output_folder = base_dir

for annotation in os.listdir(annotation_folder):
  if annotation.endswith(".csv"):
    annotation_name = annotation.split('.')[0]
    print(f"Converting {annotation}")
    # Convert the CSV file
    standard_df = viame_to_standard(os.path.join(annotation_folder, annotation), annotation_name)
    # Write the converted DataFrame to a new CSV file
    annotation = 'annotations.csv'
    standard_df.to_csv(os.path.join(output_folder, annotation), index=False)
    print(f"Converted {annotation}")

Converting annotations.viame.csv
Converted annotations.csv


In [6]:
df = pd.read_csv(output_folder + '/sp_natgeo.csv')
df.head()

Unnamed: 0,Filename,Family,Genus,Species,ymin,xmin,xmax,ymax,Augmentation,Source,track_id,frame_id
0,sp_natgeo_frame30.jpg,,Carcharhinus,limbatus,251,1811,1921,340,none,sp_natgeo,1,1
1,sp_natgeo_frame60.jpg,,Carcharhinus,limbatus,221,1650,1919,379,none,sp_natgeo,1,2
2,sp_natgeo_frame90.jpg,,Carcharhinus,limbatus,253,1632,1749,410,none,sp_natgeo,1,3
3,sp_natgeo_frame120.jpg,,Carcharhinus,limbatus,279,1614,1878,391,none,sp_natgeo,1,4
4,sp_natgeo_frame150.jpg,,Carcharhinus,limbatus,276,1762,1919,364,none,sp_natgeo,1,5


## Move Annotations to the folders in phase2

In [9]:
import shutil

output_folder = '/vol/biomedic3/bglocker/ugproj2324/fv220/datasets/phase2_copy'
base_folder = '/vol/biomedic3/bglocker/ugproj2324/fv220/datasets/frame_extraction_raw/'
annotations_folders = [
  base_folder + 'sp/cleaned_annotations/annotations_standard',
  base_folder + 'shlife/cleaned_annotations/annotations_standard',
  base_folder + 'gfp/cleaned_annotations/annotations_standard'
]

for annotation_folder in annotations_folders:
  for annotation in os.listdir(annotation_folder):
    if annotation.endswith(".csv"):
      source_name = annotation.split('.')[0]
      if source_name in os.listdir(output_folder):
        print(f"Source {source_name} already exists, copying")
        # copy the file to the source folder
        shutil.copy(os.path.join(annotation_folder, annotation), os.path.join(output_folder, source_name, annotation))
      else:
        print(f'not copying {source_name} as it does not exist in the output folder')

Source sp_palau already exists, copying
Source sp_palau3 already exists, copying
Source sp_palau4 already exists, copying
Source sp_palau5 already exists, copying
Source sp_palau2 already exists, copying
Source shlife_smooth1 already exists, copying
Source shlife_scalloped1 already exists, copying
Source shlife_bull6 already exists, copying
Source shlife_bull1 already exists, copying
Source shlife_grey1 already exists, copying
Source shlife_bull7 already exists, copying
Source shlife_silvertip1 already exists, copying
Source shlife_grey2 already exists, copying
Source shlife_bull4 already exists, copying
Source shlife_bull3 already exists, copying
Source shlife_smooth3 already exists, copying
Source shlife_scalloped4 already exists, copying
Source shlife_scalloped3 already exists, copying
Source shlife_scalloped2 already exists, copying
Source shlife_smooth2 already exists, copying
Source shlife_scalloped5 already exists, copying
Source shlife_bull2 already exists, copying
Source shlif

Source gfp_palau1 already exists, copying
Source gfp_rand4 already exists, copying
Source gfp_kiribati1 already exists, copying
Source gfp_bahamas2 already exists, copying
Source gfp_samoa1 already exists, copying
Source gfp_maldives1 already exists, copying
Source gfp_niue1 already exists, copying
Source gfp_bahamas1 already exists, copying
Source gfp_belize1 already exists, copying
Source gfp_caledonia1 already exists, copying
Source gfp_polynesia1 already exists, copying
Source gfp_solomon1 already exists, copying
Source gfp_rand9 already exists, copying
Source gfp_rand11 already exists, copying
Source gfp_rand7 already exists, copying
Source gfp_fiji1 already exists, copying
Source gfp_montserrat1 already exists, copying
Source gfp_png1 already exists, copying
Source gfp_barbados1 already exists, copying
Source gfp_tobago1 already exists, copying
Source gfp_tiger1 already exists, copying
Source gfp_cook1 already exists, copying
Source gfp_hawaii1 already exists, copying
Source gfp_

## Remove Negative Annotations from Phase2

In [1]:
# For any annotation file in any subfolder of output_folder, make all the values in xmin, ymin, xmax, ymax columns be max(0, value)
output_folder = '/vol/biomedic3/bglocker/ugproj2324/fv220/datasets/phase2'
for root, dirs, files in os.walk(output_folder):
  for file in files:
    if file.endswith(".csv"):
      print(f"Processing {file}")
      df = pd.read_csv(os.path.join(root, file))
      df['xmin'] = df['xmin'].apply(lambda x: max(0, x))
      df['ymin'] = df['ymin'].apply(lambda x: max(0, x))
      df['xmax'] = df['xmax'].apply(lambda x: max(0, x))
      df['ymax'] = df['ymax'].apply(lambda x: max(0, x))
      df.to_csv(os.path.join(root, file), index=False)

NameError: name 'os' is not defined