# Data Visualization

## Libraries

In [21]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import random
import json

## Constants

In [22]:
PATH = "../../"
IMAGES_PATH = PATH + "Dataset/images/"

csv_file = PATH + "Data/CSVs/dataset_caltech_yolo.csv"

predictions_file = PATH + "predictions.json"

new_predictions_file = PATH + "new_predictions.json"

In [23]:
species_mapping = {
    0: "bird",
    1: "cow",
    2: "domestic dog",
    3: "egyptian mongoose",
    4: "european badger",
    5: "european rabbit",
    6: "fallow deer",
    7: "genet",
    8: "horse",
    9: "human",
    10: "iberian hare",
    11: "iberian lynx",
    12: "red deer",
    13: "red fox",
    14: "wild boar",

    213: "canidae",
    510: "leporid",
    612: "cervid",

    1000: "animal"
}

inverted_species_mapping = {value: key for key, value in species_mapping.items()}

family_mapping = {
    "bird": "",
    "cow": "",
    "domestic dog": "canidae",
    "egyptian mongoose": "",
    "european badger": "",
    "european rabbit": "leporid",
    "fallow deer": "cervid",
    "genet": "",
    "horse": "",
    "human": "",
    "iberian hare": "leporid",
    "iberian lynx": "",
    "red deer": "cervid",
    "red fox": "canidae",
    "wild boar": "",
}

# Create the inverted dictionary
inverted_family_mapping = {}
for key, value in family_mapping.items():
    if value:
        if value not in inverted_family_mapping:
            inverted_family_mapping[value] = [key]
        else:
            inverted_family_mapping[value].append(key)

In [24]:
def get_species_names_from_family_names(family_name):
    return inverted_family_mapping[family_name]

def get_specie_id_from_specie_name(specie_name):
    return inverted_species_mapping[specie_name]

def get_specie_name_from_specie_id(specie_id):
    return species_mapping[specie_id]

def get_family_name_from_specie_id(specie_id): # get_family_name_from_specie_id(12)
    return family_mapping[species_mapping[specie_id]]

def get_family_id_from_specie_id(specie_id): # get_family_name_from_specie_id(12)
    return inverted_species_mapping[family_mapping[species_mapping[specie_id]]]

def get_family_name_from_family_id(family_id):
    return species_mapping[family_id]

def get_family_id_from_family_name(family_name):
    return inverted_species_mapping[family_name]

In [25]:
# Load the files
df = pd.read_csv(csv_file)

with open(predictions_file, "r") as f:
    predictions = json.load(f)

In [26]:
threshold = 0.75

In [27]:
{"image_id": "80_20220907 (4008).JPG", "category_id": 14, "bbox": [677.8606567382812, 678.1602783203125, 209.16204833984375, 79.592041015625], "score": 0.6535040736198425, "activations": [3.7420848002511775e-06, 0.006725645158439875, 9.184383088722825e-05, 0.0006683067185804248, 0.0002859352680388838, 1.6009676983230747e-05, 1.9101062207482755e-05, 6.503734766738489e-05, 0.03393729031085968, 0.0005564815364778042, 6.1949035625730176e-06, 0.00018715407350100577, 0.00028540799394249916, 0.00114127853885293, 0.6535040736198425]};

In [28]:
# Image ID
target_image_id = "80_20220907 (4008).JPG" #"79_20220701 (284).JPG"

def classify_species_family(item, species_threshold=0.75):
    max_activation = max(item['activations'])
    max_species_index = item['activations'].index(max_activation)
    max_species = species_mapping[max_species_index]
    
    if max_activation < species_threshold:
        family = family_mapping[max_species]
        if family != "":
            max_species = family
    
    return max_species

# Buscar el item con el image_id objetivo
target_item = None
for item in predictions:
    if item['image_id'] == target_image_id:
        target_item = item
        break

if target_item is not None:
    final_classification = classify_species_family(target_item)
    
    original_classification = species_mapping[target_item['activations'].index(max(target_item['activations']))]
    
    if final_classification != original_classification:
        final_classification = f"{final_classification} ({original_classification})"
    
    print("Clasificación final:", final_classification)
else:
    print(f"No se encontró ningún item con image_id igual a '{target_image_id}'.")

No se encontró ningún item con image_id igual a '80_20220907 (4008).JPG'.


In [29]:
# Process each prediction and update the category_id and original_category_id
for prediction in predictions:

    class_found = False
    
    species_index = prediction['category_id']
    species_name = species_mapping.get(species_index)

    original_score = max(prediction['activations'])

    if species_name is not None:
        # Determine if the species should be updated based on activation threshold
        if original_score < threshold:
            family_name = family_mapping.get(species_name)

            if family_name:
                family_species_names = get_species_names_from_family_names(family_name)
                # Calculate upper category score
                family_score = 0
                for family_specie_name in family_species_names:
                    family_specie_id = get_specie_id_from_specie_name(family_specie_name)
                    family_score += prediction['activations'][family_specie_id]
                
                family_index = get_family_id_from_family_name(family_name)
                
                if family_score >= threshold:
                    class_found = True
                    # Update category_id and original_category_id
                    prediction['original_category_id'] = species_index
                    prediction['category_id'] = family_index
                    prediction['original_score'] = original_score
                    prediction['score'] = family_score
            
            if not class_found:
                # if there is an upper category
                prediction['original_category_id'] = species_index
                prediction['category_id'] = 1000 # animal idx
                prediction['original_score'] = original_score
                prediction['score'] = 1

In [30]:
# Save the modified predictions to a new JSON file
with open(new_predictions_file, 'w') as f:
    json.dump(predictions, f, indent=4)