In [None]:
from functions import get_data_api
from functions import convert_date
from functions import replace_urls_with_names
from functions import bd_save
from functions import load_bd
from functions import list_tables_bd
import pandas as pd
import matplotlib.pyplot as plt
import json

<h1 align="center"><strong>ETAPA DE EXTRAÇÃO</strong></h1>


### ***CHARACTERS***

In [None]:
def get_all_characters():
    base = 'Characters'
    etapa = ' Extração'
    character_url_next = 'https://rickandmortyapi.com/api/character'

    all_characters = []  
    
    while character_url_next:  
        character_data = get_data_api(character_url_next, base, etapa) 
        character_url_next = character_data['info']['next'] 
        all_characters.extend(character_data['results'])  
        
    
    return all_characters

characters = get_all_characters()

print(len(characters))

In [None]:
df_characters = pd.DataFrame(characters)

df_characters.info()

df_characters.head(2)

### ***LOCATIONS***

In [None]:
def get_all_locations():

    base = 'Locations'
    etapa = ' Extração'
    
    location_url_next = 'https://rickandmortyapi.com/api/location'

    all_locations = []  
    
    while location_url_next:  
        location_data = get_data_api(location_url_next, base, etapa) 
        location_url_next = location_data['info']['next'] 
        all_locations.extend(location_data['results'])  
        
    
    return  all_locations

locations = get_all_locations()

print(len(locations))

In [None]:
df_locations = pd.DataFrame(locations)

df_locations.info()

df_locations.head(2)

### ***EPISODES***

In [None]:
def get_all_episodes():

 episode_url_next = 'https://rickandmortyapi.com/api/episode'

 base = "Episodes"
 etapa = "Extração"

 all_episodes = []

 while episode_url_next:
  episode_data = get_data_api(episode_url_next, base, etapa)
  episode_url_next = episode_data['info']['next']
  all_episodes.extend(episode_data['results'])

 return all_episodes

episodes = get_all_episodes()

print(len(episodes))

In [None]:
df_episodes = pd.DataFrame(episodes)

df_episodes.info()

df_episodes.head(2)

<h1 align="center"><strong>ETAPA DE TRANSFORMAÇÃO</strong></h1>


### **CHARACTERS**

In [None]:

df_characters["type"] = df_characters["type"].replace("", "unknown")

df_characters["name_origin"] = df_characters["origin"].apply(lambda name_origin: name_origin.get("name"))
df_characters["url_origin"] = df_characters["origin"].apply(lambda url_origin: url_origin.get("url") )

df_characters["location_name"] = df_characters["location"].apply(lambda location_name: location_name.get("name"))
df_characters["url_location"] = df_characters["location"].apply(lambda url_location: url_location.get("url"))

df_characters["total_episodes"] = df_characters["episode"].apply(len)


df_characters.rename(columns={'url': 'character_url'}, inplace=True)

df_characters = df_characters.drop(columns=["origin", "location", "created", "episode"])


df_characters.head()

### ***LOCATIONS***

In [None]:
df_locations.head(2)



In [None]:
#Rename and drop columns
df_locations.rename(columns={'residents': 'residents_name'}, inplace=True)
df_locations.rename(columns={'url': 'location_url'}, inplace=True)
df_locations = df_locations.drop(columns=[ "created"])


df_locations['residents_name'] = df_locations['residents_name'].apply(
    lambda urls: replace_urls_with_names(df_characters, 'character_url', 'name', urls)
)


df_locations["total_residents"] = df_locations["residents_name"].apply(len)


# Garantir que os valores sejam do tipo strings para não dar erro na etapa de bd
df_locations['residents_name'] = df_locations['residents_name'].apply(str)

df_locations.head(2)

### ***EPISODES***

In [None]:
df_episodes.head(2)

In [None]:
df_episodes.rename(columns={
  'characters': 'characters_name',
  'url': 'episode_url'
  }, inplace=True)


df_episodes['air_date'] = df_episodes['air_date'].apply(convert_date)
df_episodes = df_episodes.drop(columns=["created"])

df_episodes['characters_name'] = df_episodes['characters_name'].apply(
    lambda urls: replace_urls_with_names(df_characters, 'character_url', 'name', urls)
)

df_episodes["total_characters_in_the_episode"] = df_episodes["characters_name"].apply(len)


# Garantir que os valores sejam strings
df_episodes['characters_name'] = df_episodes['characters_name'].apply(str)



df_episodes.head(8)


<h1 align="center"><strong>ETAPA DE BANCO DE DADOS</strong></h1>


In [None]:
path_db = 'rick-and-morty-api.db'
table_name = 'characters'

bd_save(df_characters, path_db, table_name, base='CharacterBase', stage='Banco de Dados')

In [None]:
path_db = 'rick-and-morty-api.db'
table_name = 'locations'

bd_save(df_locations, path_db, table_name, base='LocationBase', stage='Banco de Dados')


In [None]:
path_db = 'rick-and-morty-api.db'
table_name = 'episodes'

bd_save(df_episodes, path_db, table_name, base='EpisodeBase', stage='Banco de Dados')

In [None]:
print("Tabelas no banco de dados:")

list_tables_bd(path_db, base='Rick and Morty API', stage='Banco de Dados')

In [None]:
path_db = 'rick-and-morty-api.db'

# PERGUNTAR PQ ESTA DANDO ESSE ERRO, DEVERIA SUBIR O ALERT

load_bd(table_name, path_db, base='Episodes', stage='Banco de Dados')

<h1 align="center"><strong>ANÁLISE DOS DADOS</strong></h1>


In [None]:
df_locations.head()


In [None]:
df_characters.head()

In [None]:
df_episodes.head()

Personagens com mais aparições

In [None]:
max_episodes = df_characters['total_episodes'].max()

most_frequent_characters = df_characters[df_characters['total_episodes'] == max_episodes]


most_frequent_characters


In [None]:
# Relação entre status e espécie
status_species = df_characters.groupby(['species', 'status']).size().unstack(fill_value=0)


status_species



In [None]:

status_species.plot(kind='bar', stacked=True, figsize=(10, 4), color=['#90EE90', '#F08080', '#D3D3D3'])
plt.title('Relação entre Status e Espécie')
plt.xlabel('Espécie')
plt.ylabel('Número de Personagens')
plt.legend(title='Status')
plt.show()
