# Setup Dependencies

In [1]:
from transformers import pipeline
import kagglehub
import os
import pandas as pd
from utils import get_files_in_dir

  from .autonotebook import tqdm as notebook_tqdm


# Get Model and Data

In [2]:
model_name = "openai/clip-vit-large-patch14-336"
classifier = pipeline("zero-shot-image-classification", model = model_name, use_fast=True)

path = kagglehub.dataset_download("gpreda/happy-mammals-with-128x128-image-size")

print("Path to dataset files:", path)

image_paths =  get_files_in_dir(f"{path}/train_images_128")
labels_path="train.csv"

labels = pd.read_csv(labels_path, index_col="image")
print(labels.index)

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Device set to use cuda:0


Path to dataset files: /home/chris/.cache/kagglehub/datasets/gpreda/happy-mammals-with-128x128-image-size/versions/1
Index(['00021adfb725ed.jpg', '000562241d384d.jpg', '0007c33415ce37.jpg',
       '0007d9bca26a99.jpg', '00087baf5cef7a.jpg', '000a8f2d5c316a.jpg',
       '000be9acf46619.jpg', '000bef247c7a42.jpg', '000c3d63069748.jpg',
       '000c476c11bad5.jpg',
       ...
       'fff2dbac692435.jpg', 'fff4686bea76f0.jpg', 'fff47736026701.jpg',
       'fff54859cb0beb.jpg', 'fff603f5af8614.jpg', 'fff639a7a78b3f.jpg',
       'fff8b32daff17e.jpg', 'fff94675cc1aef.jpg', 'fffbc5dd642d8c.jpg',
       'fffdcd42312777.jpg'],
      dtype='object', name='image', length=51033)


# Collect all possible labels and setup results table

In [3]:
print(f"Columns: {labels.columns}")
possible_labels=labels["species"].unique()
print(f"Categorizing images into the following species: {", ".join(possible_labels)}")

results=pd.DataFrame(columns=["image_name","is_prediction_correct"])
results["is_prediction_correct"].astype(bool)

Columns: Index(['species', 'individual_id'], dtype='object')
Categorizing images into the following species: melon_headed_whale, humpback_whale, false_killer_whale, bottlenose_dolphin, beluga, minke_whale, fin_whale, blue_whale, gray_whale, southern_right_whale, common_dolphin, kiler_whale, pilot_whale, dusky_dolphin, killer_whale, long_finned_pilot_whale, sei_whale, spinner_dolphin, bottlenose_dolpin, cuviers_beaked_whale, spotted_dolphin, globis, brydes_whale, commersons_dolphin, white_sided_dolphin, short_finned_pilot_whale, rough_toothed_dolphin, pantropic_spotted_dolphin, pygmy_killer_whale, frasiers_dolphin


Series([], Name: is_prediction_correct, dtype: bool)

# Generate all predictions

In [6]:
print("\n\n")
for i in range(0,100):
    image_path=image_paths[i]
    image_name=os.path.basename(image_path)
    print(f"image name {image_name} image path {image_path}")
    
    if  image_name not in labels.index:
        raise KeyError(f"image {image_name} not in labels index")

    label=labels.loc[image_name,"species"]

    print(label)
    
    scores = classifier(image_path, candidate_labels = possible_labels)
    prediction = scores[0]['label']

    print(f"Image {image_name}: The highest score is {scores[0]['score']:.3f} for the label: {label} and prediction: {prediction}")
    
    is_prediction_correct=label==prediction

    result = pd.DataFrame([{"image_name": image_name, "is_prediction_correct": is_prediction_correct}])
    results = pd.concat([results, result])




image name 173edf821e72c4.jpg image path /home/chris/.cache/kagglehub/datasets/gpreda/happy-mammals-with-128x128-image-size/versions/1/train_images_128/173edf821e72c4.jpg
bottlenose_dolphin
Image 173edf821e72c4.jpg: The highest score is 0.311 for the label: bottlenose_dolphin and prediction: spotted_dolphin
image name e714ba12963261.jpg image path /home/chris/.cache/kagglehub/datasets/gpreda/happy-mammals-with-128x128-image-size/versions/1/train_images_128/e714ba12963261.jpg
blue_whale
Image e714ba12963261.jpg: The highest score is 0.698 for the label: blue_whale and prediction: minke_whale
image name c191adeee3dc98.jpg image path /home/chris/.cache/kagglehub/datasets/gpreda/happy-mammals-with-128x128-image-size/versions/1/train_images_128/c191adeee3dc98.jpg
melon_headed_whale
Image c191adeee3dc98.jpg: The highest score is 0.173 for the label: melon_headed_whale and prediction: dusky_dolphin
image name 7b6c757cee035f.jpg image path /home/chris/.cache/kagglehub/datasets/gpreda/happy-

# Accuracy Analysis

In [5]:
print("")
numCorrect=results["is_prediction_correct"].sum()
accuracy=numCorrect/len(results["is_prediction_correct"])
print(f"num correct: {numCorrect} Accuracy: {accuracy}")


num correct: 0 Accuracy: 0.0
