In [None]:
"1,2,3,4".split(",")

['1', '2', '3', '4']

In [1]:
from pathlib import Path
import json
import pandas as pd

In [2]:
ruta = Path(r'..\data\raw\pokemon')

In [None]:
ruta_raw = Path(r'..\data\raw\species')

def load_species(raw_species_path, generations):
    """Carga las especies de Pokémon desde archivos JSON crudos.
    Args:
        raw_species_path (Path): Ruta donde se encuentran los archivos crudos de especies.
        generations (list): Lista de generaciones a cargar.
    Returns:
        list: Lista de especies de Pokémon.
    """
    species = []
    for generation in generations:
        try:
            with open(f"{raw_species_path}/{generation}.json", "r", encoding="utf-8") as file:
                pokemon_gens = json.load(file)
                species.extend(pokemon_gens)
            #logging.info(f"Se han cargado {len(pokemon_gens)} especies de la generación {generation}.", extra={"phase": "TRANSFORM"})
        except FileNotFoundError:
            #logging.error(f"Archivo no encontrado: {raw_species_path}/{generation}.json")
            continue
        except json.JSONDecodeError:
            #logging.error(f"Error al decodificar JSON en el archivo: {raw_species_path}/{generation}.json")
            continue
        return species

def transform_species(raw_species_path, generations):
    pokemon_species= load_species(raw_species_path, generations)
    species_data = []
    for species in pokemon_species:
        try:
            if not isinstance(species, dict):
                raise ValueError("El objeto no es un diccionario.")
        except ValueError as e:
            logging.error(f"Error al procesar la especie: {e}")
            continue
        if not species.get("id") or not species.get("name"):
            continue
        extracted_data = {
            "species_id": species["id"],
            "name": species["name"],
            "color": species["color"]["name"],
            "habitat": species["habitat"]["name"] if species.get("habitat") else None,
            "generation": species["generation"]["name"],
            "previous_evolution": species["evolves_from_species"]["name"] if species.get("evolves_from_species") else None,
            "is_mythical": species["is_mythical"],
            "is_baby": species["is_baby"],
            "is_legendary": species["is_legendary"],
            "nat_pokedex_entry" : next(
                (entry["entry_number"] for entry in species["pokedex_numbers"] if entry["pokedex"]["name"] == "national"),
                None
            )
        }
        species_data.append(extracted_data)
    df_species = pd.DataFrame(species_data)
    return df_species

In [13]:
loaded_species = transform_species(ruta_raw,['generation-i'])
df_species = pd.DataFrame(loaded_species).sort_values(by='species_id')
df_species.head(20)

Unnamed: 0,species_id,name,color,habitat,generation,previous_evolution,is_mythical,is_baby,is_legendary,nat_pokedex_entry
0,1,bulbasaur,green,grassland,generation-i,,False,False,False,1
149,2,ivysaur,green,grassland,generation-i,bulbasaur,False,False,False,2
28,3,venusaur,green,grassland,generation-i,ivysaur,False,False,False,3
1,4,charmander,red,mountain,generation-i,,False,False,False,4
29,5,charmeleon,red,mountain,generation-i,charmander,False,False,False,5
30,6,charizard,red,mountain,generation-i,charmeleon,False,False,False,6
2,7,squirtle,blue,waters-edge,generation-i,,False,False,False,7
31,8,wartortle,blue,waters-edge,generation-i,squirtle,False,False,False,8
32,9,blastoise,blue,waters-edge,generation-i,wartortle,False,False,False,9
3,10,caterpie,green,forest,generation-i,,False,False,False,10
