In [2]:
## Create a 'demo' set for testing YOLO with a small batch (~100 images)

import pandas as pd
import random

In [None]:
# Read in the "trail" data and create a "demo" data as as ubset of that
input_file_path = '/Users/caraappel/Documents/CV4E/oregon_critters/metadata_labels/trail_split_dim.csv'
output_file_path = '/Users/caraappel/Documents/CV4E/oregon_critters/metadata_labels/demo.csv'

# Number of rows to randomly extract
X = 100  # Change this to the desired number of rows

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(input_file_path)

In [2]:
# Check if the number of rows in the DataFrame is less than X
if len(df) < X:
    print(f"Error: The DataFrame has fewer than {X} rows.")
else:
    # Randomly select X rows
    random_rows = df.sample(n=X, random_state=42)  # Set a seed for reproducibility

    # Save the randomly selected rows to a new CSV file
    random_rows.to_csv(output_file_path, index=False)

    print(f"Randomly selected {X} rows saved to {output_file_path}.")

Randomly selected 100 rows saved to /Users/caraappel/Documents/CV4E/oregon_critters/metadata_labels/demo.csv.


In [4]:
# Summarize the number of rows from each group (train/test/val)
type_counts = random_rows['group'].value_counts()

print("Summary of 'group' column:")
print(type_counts)

Summary of 'group' column:
group
train    70
val      16
test     14
Name: count, dtype: int64


In [9]:
# Now create .txt files for train/test/val filenames
unique_groups = random_rows['group'].unique()

# Create separate text files for each group
for group in unique_groups:
    # Filter the DataFrame for the current group
    group_df = random_rows[random_rows['group'] == group]

    # Extract the "filename" values for the current group
    filenames = group_df['full_path'].tolist()

    # Create a text file for the current group
    output_file_path = f'/Users/caraappel/Documents/CV4E/oregon_critters/metadata_labels/demo_{group}_filenames.txt'
    with open(output_file_path, 'w') as file:
        file.write('\n'.join(filenames))

    print(f"Text file for group {group} created: {output_file_path}")


Text file for group test created: /Users/caraappel/Documents/CV4E/oregon_critters/metadata_labels/demo_test_filenames.txt
Text file for group train created: /Users/caraappel/Documents/CV4E/oregon_critters/metadata_labels/demo_train_filenames.txt
Text file for group val created: /Users/caraappel/Documents/CV4E/oregon_critters/metadata_labels/demo_val_filenames.txt


In [None]:
## Now also run scripts 3, 5, 7, and 8 to finish making this "demo" set

In [4]:
# Fix empty "both_train.txt", "both_val.txt", and "both_test.txt" files

input_file_path = '/Users/caraappel/Documents/CV4E/oregon_critters/metadata_labels/both_split.csv'
df = pd.read_csv(input_file_path)

type_counts = df['group'].value_counts()

print("Summary of 'group' column:")
print(type_counts)

#
unique_groups = df['group'].unique()

# Create separate text files for each group
for group in unique_groups:
    # Filter the DataFrame for the current group
    group_df = df[df['group'] == group]

    # Extract the "filename" values for the current group
    filenames = group_df['full_path'].tolist()

    # Create a text file for the current group
    output_file_path = f'/Users/caraappel/Documents/CV4E/oregon_critters/metadata_labels/both_{group}_new.txt'
    with open(output_file_path, 'w') as file:
        file.write('\n'.join(filenames))

    print(f"Text file for group {group} created: {output_file_path}")


Summary of 'group' column:
group
train    65358
val      13928
test     13864
Name: count, dtype: int64
Text file for group train created: /Users/caraappel/Documents/CV4E/oregon_critters/metadata_labels/both_train_new.txt
Text file for group test created: /Users/caraappel/Documents/CV4E/oregon_critters/metadata_labels/both_test_new.txt
Text file for group val created: /Users/caraappel/Documents/CV4E/oregon_critters/metadata_labels/both_val_new.txt
