# Pipeline para ETL de Pokemons

### Configuaração do notebook.

In [17]:
import requests
from neo4j import GraphDatabase
import pandas as pd

NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "mary123.M"

### Configuração de conexão com o banco

In [18]:
class Neo4jConnection:
    """
    Classe de conexão com o banco do Neo4j.
    
    Args:
        uri str: DNS bolt do banco.
        user str: Nome do user admin.
        password str: Senha do usuario. 
    """
    def __init__(self, uri,user,password) -> None:
        self.driver = GraphDatabase.driver(uri, auth=(user,password))
        
    def close(self):
        self.driver.close()
        
    def execute_query(self, query, parameters=None):
        with self.driver.session() as session:
            return session.run(query, parameters)
    
connection = Neo4jConnection(NEO4J_URI,NEO4J_USER,NEO4J_PASSWORD)
print("Conexão estabelicida com sucesso!")

Conexão estabelicida com sucesso!


### Extração dos 151 pokemons da primeira geração via requests.

In [19]:
POKE_API_BASE_URL = "https://pokeapi.co/api/v2"

def fetch_pokemon_data(limit=151):
    """
    Função que executa a request na api.
    
    Args:
        limit int: Número dos pokemons que você quer.
    Returns:
        pokemons json: Retorna uma lista com os pokemons.
    """
    response = requests.get(f"{POKE_API_BASE_URL}/pokemon?limit={limit}")
    response.raise_for_status()
    return response.json()

pokemon_data = fetch_pokemon_data()
pokemon_list = pokemon_data['results']

pd.DataFrame(pokemon_list)

Unnamed: 0,name,url
0,bulbasaur,https://pokeapi.co/api/v2/pokemon/1/
1,ivysaur,https://pokeapi.co/api/v2/pokemon/2/
2,venusaur,https://pokeapi.co/api/v2/pokemon/3/
3,charmander,https://pokeapi.co/api/v2/pokemon/4/
4,charmeleon,https://pokeapi.co/api/v2/pokemon/5/
...,...,...
146,dratini,https://pokeapi.co/api/v2/pokemon/147/
147,dragonair,https://pokeapi.co/api/v2/pokemon/148/
148,dragonite,https://pokeapi.co/api/v2/pokemon/149/
149,mewtwo,https://pokeapi.co/api/v2/pokemon/150/


### Tranformação dos Dados

In [20]:
def fetch_pokemons_details(url):
    """
    Obtem os detalhes de cada pokemon com base na sua url.
    
    Args:
        url str: Url exclusiva do pokemon.
    """
    response = requests.get(url)
    response.raise_for_status()
    return response.json()

details = fetch_pokemons_details(pokemon_list[0]['url'])
details.keys()

dict_keys(['abilities', 'base_experience', 'cries', 'forms', 'game_indices', 'height', 'held_items', 'id', 'is_default', 'location_area_encounters', 'moves', 'name', 'order', 'past_abilities', 'past_types', 'species', 'sprites', 'stats', 'types', 'weight'])

### Carregamento do dados

In [21]:
# Transformação dos dados com validação
nodes = []
relationships = []

for pokemon in pokemon_list:
    details = fetch_pokemons_details(pokemon['url'])
    pokemon_name = details['name']
    types = [t['type']['name'] for t in details['types']]
    abilities = [a['ability']['name'] for a in details['abilities']]

    # Adicionar nó do Pokémon
    nodes.append({'name': pokemon_name, 'type': 'Poke'})

    # Adicionar nós e relacionamentos para tipos
    for poke_type in types:
        nodes.append({'name': poke_type, 'type': 'Type'})
        relationships.append({'from': pokemon_name, 'to': poke_type, 'type': 'PERTENCE_A'})

    # Adicionar nós e relacionamentos para habilidades
    for ability in abilities:
        nodes.append({'name': ability, 'type': 'Ability'})
        relationships.append({'from': pokemon_name, 'to': ability, 'type': 'TEM_HABILIDADE'})

# Remover duplicados antes de carregar
unique_nodes = {f"{n['name']}_{n['type']}": n for n in nodes}.values()
unique_relationships = {
    f"{r['from']}_{r['to']}_{r['type']}": r for r in relationships
}.values()

# Mostrar os dados transformados
pd.DataFrame(unique_nodes), pd.DataFrame(unique_relationships)

(            name     type
 0      bulbasaur     Poke
 1          grass     Type
 2         poison     Type
 3       overgrow  Ability
 4    chlorophyll  Ability
 ..           ...      ...
 277    dragonair     Poke
 278    dragonite     Poke
 279   multiscale  Ability
 280       mewtwo     Poke
 281          mew     Poke
 
 [282 rows x 2 columns],
           from           to            type
 0    bulbasaur        grass      PERTENCE_A
 1    bulbasaur       poison      PERTENCE_A
 2    bulbasaur     overgrow  TEM_HABILIDADE
 3    bulbasaur  chlorophyll  TEM_HABILIDADE
 4      ivysaur        grass      PERTENCE_A
 ..         ...          ...             ...
 608     mewtwo      psychic      PERTENCE_A
 609     mewtwo     pressure  TEM_HABILIDADE
 610     mewtwo      unnerve  TEM_HABILIDADE
 611        mew      psychic      PERTENCE_A
 612        mew  synchronize  TEM_HABILIDADE
 
 [613 rows x 3 columns])

In [22]:
def load_data_to_neo4j(connection, nodes, relationships):
    # Carregar nós
    for node in nodes:
        query = f"""
        MERGE (n:{node['type']} {{name: $name}})
        """
        connection.execute_query(query, {"name": node["name"]})

    # Carregar relacionamentos
    for rel in relationships:
        query = f"""
        MATCH (a {{name: $from}}), (b {{name: $to}})
        MERGE (a)-[:{rel['type']}]->(b)
        """
        connection.execute_query(query, {"from": rel["from"], "to": rel["to"]})

# Executar carregamento no banco
load_data_to_neo4j(connection, unique_nodes, unique_relationships)
print("Dados carregados com sucesso!")


Dados carregados com sucesso!
