In [4]:
# formatting bird classes properly
import numpy as np
bird_classes = np.load("../aml-2025-feathers-in-focus/class_names.npy", allow_pickle=True).item()

def split_lower(np_string_key: np.str_) -> str:
    s = str(np_string_key)
    parts = s.split('.')
    if len(parts) > 1:
        # If there's a dot, return the part after it, lowercased
        return parts[1].lower()
    else:
        # If no dot, return the entire string, lowercased
        return s.lower()

def clean_bird_classes():
    # Convert np.str_ to regular str, split, take the name part, and lowercase

    # Create the new dictionary using a dictionary comprehension
    # Iterate over the original items (key, value) from bird_classes
    # Apply split_lower to the key (np.str_) to get the cleaned, lowercased string
    # Use the cleaned string as the new key and the original integer ID as the new value
    cleaned_birds_dict = {
        split_lower(key): value
        for key, value in bird_classes.items()
    }

    return cleaned_birds_dict
print(clean_bird_classes())

clean_birds_dict = clean_bird_classes()

# Swap key and value in dictionary for submission
# clean_birds_dict = dict(zip(clean_birds_dict.values(), clean_birds_dict.keys()))
# print(clean_birds_dict)

{'black_footed_albatross': 1, 'laysan_albatross': 2, 'sooty_albatross': 3, 'groove_billed_ani': 4, 'crested_auklet': 5, 'least_auklet': 6, 'parakeet_auklet': 7, 'rhinoceros_auklet': 8, 'brewer_blackbird': 9, 'red_winged_blackbird': 10, 'rusty_blackbird': 11, 'yellow_headed_blackbird': 12, 'bobolink': 13, 'indigo_bunting': 14, 'lazuli_bunting': 15, 'painted_bunting': 16, 'cardinal': 17, 'spotted_catbird': 18, 'gray_catbird': 19, 'yellow_breasted_chat': 20, 'eastern_towhee': 21, 'chuck_will_widow': 22, 'brandt_cormorant': 23, 'red_faced_cormorant': 24, 'pelagic_cormorant': 25, 'bronzed_cowbird': 26, 'shiny_cowbird': 27, 'brown_creeper': 28, 'american_crow': 29, 'fish_crow': 30, 'black_billed_cuckoo': 31, 'mangrove_cuckoo': 32, 'yellow_billed_cuckoo': 33, 'gray_crowned_rosy_finch': 34, 'purple_finch': 35, 'northern_flicker': 36, 'acadian_flycatcher': 37, 'great_crested_flycatcher': 38, 'least_flycatcher': 39, 'olive_sided_flycatcher': 40, 'scissor_tailed_flycatcher': 41, 'vermilion_flycat

In [1]:
# Importing huggingface base model
from typing import Any


from transformers import pipeline
import torch
import numpy as np
import pandas as pd
import os
from PIL import Image


pipe = pipeline("image-classification", model="Emiel/cub-200-bird-classifier-swin")

test_images_dir = "../aml-2025-feathers-in-focus/test_images/test_images/"
test_images = [os.path.join(test_images_dir, f) for f in os.listdir(test_images_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]

# For testing, limit to first 10 images (remove this line for full processing)
test_images = test_images[:10]

test_image_prediction_df = pd.DataFrame(columns=['image_number', 'class_name', 'image_id'])
print(f"Processing {len(test_images)} test images...")

for i, test_image_path in enumerate(test_images):  
    #choose only most likely class prediction
    test_image = Image.open(test_image_path)
    if i % 1 == 0:  # Print progress every 5 images
        print(f"Processing image {i+1}/{len(test_images)}: {os.path.basename(test_image_path)}")
    
    prediction_results = pipe(test_image)
    prediction = split_lower(prediction_results[0]['label']) #TODO fix this
    prediction_clean = [{'image_number' : os.path.basename(test_image_path), 'class_name': prediction, 'image_id': clean_birds_dict[prediction]}]

    # Convert the list of dictionaries to a DataFrame
    new_row_df = pd.DataFrame(prediction_clean)

    # Concatenate it with your existing DataFrame
    test_image_prediction_df = pd.concat([test_image_prediction_df, new_row_df], ignore_index=True)



print(f"Processing complete. Results for {len(test_image_prediction_df)} images:")
print(test_image_prediction_df)




Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Device set to use cpu


Processing 10 test images...
Processing image 1/10: 1.jpg


NameError: name 'split_lower' is not defined

In [None]:
# Adjust test_image_prediction_df for submission

submission_df = test_image_prediction_df.copy()
#remove .jpg from image_number column
submission_df["image_number"] = (
    submission_df["image_number"].str.replace(".jpg", "", regex=False)
)

#Drop class_name column, sort by image id, rename columns
submission_df.drop(columns=["class_name"], inplace=True)
submission_df.sort_values(by="image_number", inplace=True)
submission_df.rename(columns={"image_number": "id"}, inplace=True)
submission_df.rename(columns={"image_id": "label"}, inplace=True)
print(submission_df)
submission_df.to_csv("submission.csv", index=False)


     id label
0     1   111
1    10    60
2   100   135
3  1000    98
4  1001   160
5  1002   129
6  1003   196
7  1004   169
8  1005   198
9  1006    33
