# Data Visualization

## Libraries

In [160]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import random
import json

## Constants

In [161]:
PATH = "../../"
IMAGES_PATH = PATH + "Dataset/images/"

csv_file = PATH + "Data/CSVs/dataset_caltech_yolo.csv"

predictions_file = PATH + "predictions.json"

new_predictions_file = PATH + "new_predictions.json"

In [162]:
species_mapping = {
    0: "bird",
    1: "cow",
    2: "domestic dog",
    3: "egyptian mongoose",
    4: "european badger",
    5: "european rabbit",
    6: "fallow deer",
    7: "genet",
    8: "horse",
    9: "human",
    10: "iberian hare",
    11: "iberian lynx",
    12: "red deer",
    13: "red fox",
    14: "wild boar",

    213: "canidae",
    510: "leporid",
    612: "cervid"
}

inverted_species_mapping = {value: key for key, value in species_mapping.items()}

family_mapping = {
    "bird": "",
    "cow": "",
    "domestic dog": "canidae",
    "egyptian mongoose": "",
    "european badger": "",
    "european rabbit": "leporid",
    "fallow deer": "cervid",
    "genet": "",
    "horse": "",
    "human": "",
    "iberian hare": "leporid",
    "iberian lynx": "",
    "red deer": "cervid",
    "red fox": "canidae",
    "wild boar": "",
}

# Create the inverted dictionary
inverted_family_mapping = {}
for key, value in family_mapping.items():
    if value:
        if value not in inverted_family_mapping:
            inverted_family_mapping[value] = [key]
        else:
            inverted_family_mapping[value].append(key)

In [163]:
def get_species_names_from_family_names(family_name):
    return inverted_family_mapping[family_name]

def get_specie_id_from_specie_name(specie_name):
    return inverted_species_mapping[specie_name]

def get_specie_name_from_specie_id(specie_id):
    return species_mapping[specie_id]

def get_family_name_from_specie_id(specie_id): # get_family_name_from_specie_id(12)
    return family_mapping[species_mapping[specie_id]]

def get_family_id_from_specie_id(specie_id): # get_family_name_from_specie_id(12)
    return inverted_species_mapping[family_mapping[species_mapping[specie_id]]]

def get_family_name_from_family_id(family_id):
    return species_mapping[family_id]

In [164]:
# Load the files
df = pd.read_csv(csv_file)

with open(predictions_file, "r") as f:
    predictions = json.load(f)

In [165]:
# Process each prediction and update the category_id and original_category_id
for prediction in predictions:
    species_index = prediction['category_id']
    species = species_mapping.get(species_index)
    
    if species is not None:
        # Determine if the species should be updated based on activation threshold
        if max(prediction['activations']) < 0.75:
            family = family_mapping.get(species)
            if family:
                species = family
                family_index = next(key for key, value in species_mapping.items() if value == family)  # Find the key for the family in species_mapping
    
                # Update category_id and original_category_id
                prediction['original_category_id'] = species_index
                prediction['category_id'] = family_index

In [166]:
# Save the modified predictions to a new JSON file
with open(new_predictions_file, 'w') as f:
    json.dump(predictions, f, indent=4)

In [167]:
# Image ID
target_image_id = "79_20220701 (284).JPG"

def classify_species_family(item, species_threshold=0.75):
    max_activation = max(item['activations'])
    max_species_index = item['activations'].index(max_activation)
    max_species = species_mapping[max_species_index]
    
    if max_activation < species_threshold:
        family = family_mapping[max_species]
        if family != "":
            max_species = family
    
    return max_species

# Buscar el item con el image_id objetivo
target_item = None
for item in predictions_data:
    if item['image_id'] == target_image_id:
        target_item = item
        break

if target_item is not None:
    final_classification = classify_species_family(target_item)
    
    original_classification = species_mapping[target_item['activations'].index(max(target_item['activations']))]
    
    if final_classification != original_classification:
        final_classification = f"{final_classification} ({original_classification})"
    
    print("Clasificación final:", final_classification)
else:
    print(f"No se encontró ningún item con image_id igual a '{target_image_id}'.")

Clasificación final: cervid (fallow deer)
