In [1]:
import APIConnector
import json
from datetime import datetime

In [2]:
# Date folder for ingestion
today = datetime.today()
datetime_path = today.strftime("%Y-%m-%d %H-%M-%S")

In [3]:
# Instantiate an API Caller with a base URL and optionally parameters
API_caller = APIConnector.APIIngestor(base_url = "https://pokeapi.co/api/v2/")
# set up the backoff strategy
API_caller.set_HTTP_adapter(max_retry = 4, status_list = [429, 500, 502, 503, 504], backoff_constant = 2)

**Ingesting Pokemon Data**

In [None]:
pokemon_list = API_caller.send_get_request(f'pokemon/?limit=-1').json()["results"]
print(type(pokemon_list))
print(pokemon_list)
# Get the first item of retrived Pokemon List
print(pokemon_list[0])
# Get the names of all pokemons
pokemons = [i['name'] for i in pokemon_list]
# pokemon_urls = [i['name'] for i in pokemon_list]
# print(pokemons)

<class 'list'>


In [6]:
# Ingest Data in Parquet
API_caller.write_to_file(pokemon_list, f'landing-zone/{datetime_path}/parquet/lists', 'all_pokemons_list', 'parquet')
# Ingest Data in JSON
API_caller.write_to_file(pokemon_list, f'landing-zone/{datetime_path}/json/lists', 'all_pokemons_list', 'json')

print(f'completed extraction for data on : {datetime_path}')

completed extraction for data on : 2022-10-03 09-43-29


In [7]:
# ingest them into an array
for pokemon in pokemons:
    ingestion_array = []
    pokemon_object = API_caller.send_get_request(f'pokemon/{pokemon}').json()
    # print(type(pokemon_object))
    # print("-----------------------------------------------------------------------------------------")
    # print(pokemon_object)
    # print("-----------------------------------------------------------------------------------------")
    # print(type(json.dumps(pokemon_object)))
    # print("-----------------------------------------------------------------------------------------")
    # print(json.dumps(pokemon_object))
    ingestion_array.append(json.dumps(pokemon_object))
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/parquet/items/pokemons', f'{pokemon}', 'parquet')
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/json/items/pokemons', f'{pokemon}', 'json')

**Ingesting Types Data**


In [33]:
type_list = API_caller.send_get_request(f'type/?limit=10000000').json()["results"]
print(type(type_list))
print(type_list)
# Get the first item of retrived Types List
print(type_list[0])
# Get the names of all types
types = [i['name'] for i in type_list]

<class 'list'>
[{'name': 'normal', 'url': 'https://pokeapi.co/api/v2/type/1/'}, {'name': 'fighting', 'url': 'https://pokeapi.co/api/v2/type/2/'}, {'name': 'flying', 'url': 'https://pokeapi.co/api/v2/type/3/'}, {'name': 'poison', 'url': 'https://pokeapi.co/api/v2/type/4/'}, {'name': 'ground', 'url': 'https://pokeapi.co/api/v2/type/5/'}, {'name': 'rock', 'url': 'https://pokeapi.co/api/v2/type/6/'}, {'name': 'bug', 'url': 'https://pokeapi.co/api/v2/type/7/'}, {'name': 'ghost', 'url': 'https://pokeapi.co/api/v2/type/8/'}, {'name': 'steel', 'url': 'https://pokeapi.co/api/v2/type/9/'}, {'name': 'fire', 'url': 'https://pokeapi.co/api/v2/type/10/'}, {'name': 'water', 'url': 'https://pokeapi.co/api/v2/type/11/'}, {'name': 'grass', 'url': 'https://pokeapi.co/api/v2/type/12/'}, {'name': 'electric', 'url': 'https://pokeapi.co/api/v2/type/13/'}, {'name': 'psychic', 'url': 'https://pokeapi.co/api/v2/type/14/'}, {'name': 'ice', 'url': 'https://pokeapi.co/api/v2/type/15/'}, {'name': 'dragon', 'url': '

In [34]:
# Ingest Data in Parquet
API_caller.write_to_file(type_list, f'landing-zone/{datetime_path}/parquet/lists', 'all_types_list', 'parquet')
# Ingest Data in JSON
API_caller.write_to_file(type_list, f'landing-zone/{datetime_path}/json/lists', 'all_types_list', 'json')

print(f'completed extraction for data on : {datetime_path}')

completed extraction for data on : 2022-10-03 09-43-29


In [36]:
# ingest them into an array
for type1 in types:
    ingestion_array = []
    type_object = API_caller.send_get_request(f'type/{type1}').json()
    ingestion_array.append(json.dumps(type_object))
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/parquet/items/types', f'{type1}', 'parquet')
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/json/items/types', f'{type1}', 'json')

**Ingesting Generations Data**

In [4]:
datetime_path = f"2022-10-03 09-43-29"

In [27]:
generation_list = API_caller.send_get_request(f'generation/?limit=10000000000').json()["results"]
print(type(generation_list))
print(generation_list)
# Get the first item of retrived Generations List
print(generation_list[0])
# Get the names of all Generations
generations = [i['name'] for i in generation_list]

<class 'list'>
[{'name': 'generation-i', 'url': 'https://pokeapi.co/api/v2/generation/1/'}, {'name': 'generation-ii', 'url': 'https://pokeapi.co/api/v2/generation/2/'}, {'name': 'generation-iii', 'url': 'https://pokeapi.co/api/v2/generation/3/'}, {'name': 'generation-iv', 'url': 'https://pokeapi.co/api/v2/generation/4/'}, {'name': 'generation-v', 'url': 'https://pokeapi.co/api/v2/generation/5/'}, {'name': 'generation-vi', 'url': 'https://pokeapi.co/api/v2/generation/6/'}, {'name': 'generation-vii', 'url': 'https://pokeapi.co/api/v2/generation/7/'}, {'name': 'generation-viii', 'url': 'https://pokeapi.co/api/v2/generation/8/'}]
{'name': 'generation-i', 'url': 'https://pokeapi.co/api/v2/generation/1/'}


In [28]:
# Ingest Data in Parquet
API_caller.write_to_file(generation_list, f'landing-zone/{datetime_path}/parquet/lists', 'all_generations_list', 'parquet')
# Ingest Data in JSON
API_caller.write_to_file(generation_list, f'landing-zone/{datetime_path}/json/lists', 'all_generations_list', 'json')

print(f'completed extraction for data on : {datetime_path}')

completed extraction for data on : 2022-10-03 09-43-29


In [30]:
# ingest them into an array
for generation in generations:
    ingestion_array = []
    generation_object = API_caller.send_get_request(f'generation/{generation}').json()
    ingestion_array.append(json.dumps(generation_object))
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/parquet/items/generations', f'{generation}', 'parquet')
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/json/items/generations', f'{generation}', 'json')

**Ingesting Versions Data**

In [37]:
version_list = API_caller.send_get_request(f'version/?limit=10000000').json()["results"]
print(type(version_list))
print(version_list)
# Get the first item of retrived Versions List
print(version_list[0])
# Get the names of all Versions
versions = [i['name'] for i in version_list]

<class 'list'>
[{'name': 'red', 'url': 'https://pokeapi.co/api/v2/version/1/'}, {'name': 'blue', 'url': 'https://pokeapi.co/api/v2/version/2/'}, {'name': 'yellow', 'url': 'https://pokeapi.co/api/v2/version/3/'}, {'name': 'gold', 'url': 'https://pokeapi.co/api/v2/version/4/'}, {'name': 'silver', 'url': 'https://pokeapi.co/api/v2/version/5/'}, {'name': 'crystal', 'url': 'https://pokeapi.co/api/v2/version/6/'}, {'name': 'ruby', 'url': 'https://pokeapi.co/api/v2/version/7/'}, {'name': 'sapphire', 'url': 'https://pokeapi.co/api/v2/version/8/'}, {'name': 'emerald', 'url': 'https://pokeapi.co/api/v2/version/9/'}, {'name': 'firered', 'url': 'https://pokeapi.co/api/v2/version/10/'}, {'name': 'leafgreen', 'url': 'https://pokeapi.co/api/v2/version/11/'}, {'name': 'diamond', 'url': 'https://pokeapi.co/api/v2/version/12/'}, {'name': 'pearl', 'url': 'https://pokeapi.co/api/v2/version/13/'}, {'name': 'platinum', 'url': 'https://pokeapi.co/api/v2/version/14/'}, {'name': 'heartgold', 'url': 'https://po

In [38]:
# Ingest Data in Parquet
API_caller.write_to_file(version_list, f'landing-zone/{datetime_path}/parquet/lists', 'all_versions_list', 'parquet')
# Ingest Data in JSON
API_caller.write_to_file(version_list, f'landing-zone/{datetime_path}/json/lists', 'all_versions_list', 'json')

print(f'completed extraction for data on : {datetime_path}')

completed extraction for data on : 2022-10-03 09-43-29


In [39]:
# ingest them into an array
for version in versions:
    ingestion_array = []
    version_object = API_caller.send_get_request(f'version/{version}').json()
    ingestion_array.append(json.dumps(version_object))
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/parquet/items/versions', f'{version}', 'parquet')
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/json/items/versions', f'{version}', 'json')

**Ingesting Version_Group Data**

In [40]:
version_group_list = API_caller.send_get_request(f'version-group/?limit=10000000').json()["results"]
print(type(version_group_list))
print(len(version_group_list))
print(version_group_list)
# Get the first item of retrived Version Groups List
print(version_group_list[0])
# Get the names of all Version Groups
version_groups = [i['name'] for i in version_group_list]

<class 'list'>
24
[{'name': 'red-blue', 'url': 'https://pokeapi.co/api/v2/version-group/1/'}, {'name': 'yellow', 'url': 'https://pokeapi.co/api/v2/version-group/2/'}, {'name': 'gold-silver', 'url': 'https://pokeapi.co/api/v2/version-group/3/'}, {'name': 'crystal', 'url': 'https://pokeapi.co/api/v2/version-group/4/'}, {'name': 'ruby-sapphire', 'url': 'https://pokeapi.co/api/v2/version-group/5/'}, {'name': 'emerald', 'url': 'https://pokeapi.co/api/v2/version-group/6/'}, {'name': 'firered-leafgreen', 'url': 'https://pokeapi.co/api/v2/version-group/7/'}, {'name': 'diamond-pearl', 'url': 'https://pokeapi.co/api/v2/version-group/8/'}, {'name': 'platinum', 'url': 'https://pokeapi.co/api/v2/version-group/9/'}, {'name': 'heartgold-soulsilver', 'url': 'https://pokeapi.co/api/v2/version-group/10/'}, {'name': 'black-white', 'url': 'https://pokeapi.co/api/v2/version-group/11/'}, {'name': 'colosseum', 'url': 'https://pokeapi.co/api/v2/version-group/12/'}, {'name': 'xd', 'url': 'https://pokeapi.co/ap

In [41]:
# Ingest Data in Parquet
API_caller.write_to_file(version_group_list, f'landing-zone/{datetime_path}/parquet/lists', 'all_version_groups_list', 'parquet')
# Ingest Data in JSON
API_caller.write_to_file(version_group_list, f'landing-zone/{datetime_path}/json/lists', 'all_version_groups_list', 'json')

print(f'completed extraction for data on : {datetime_path}')

completed extraction for data on : 2022-10-03 09-43-29


In [42]:
# ingest them into an array
for version_group in version_groups:
    ingestion_array = []
    version_group_object = API_caller.send_get_request(f'version-group/{version_group}').json()
    ingestion_array.append(json.dumps(version_group_object))
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/parquet/items/version_groups', f'{version_group}', 'parquet')
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/json/items/version_groups', f'{version_group}', 'json')

**Ingesting Stats Data**

In [43]:
stat_list = API_caller.send_get_request(f'stat/?limit=100000000000').json()["results"]
print(type(stat_list))
print(len(stat_list))
print(stat_list)
# Get the first item of retrived Stats List
print(stat_list[0])
# Get the names of all Stats
stats = [i['name'] for i in stat_list]

<class 'list'>
8
[{'name': 'hp', 'url': 'https://pokeapi.co/api/v2/stat/1/'}, {'name': 'attack', 'url': 'https://pokeapi.co/api/v2/stat/2/'}, {'name': 'defense', 'url': 'https://pokeapi.co/api/v2/stat/3/'}, {'name': 'special-attack', 'url': 'https://pokeapi.co/api/v2/stat/4/'}, {'name': 'special-defense', 'url': 'https://pokeapi.co/api/v2/stat/5/'}, {'name': 'speed', 'url': 'https://pokeapi.co/api/v2/stat/6/'}, {'name': 'accuracy', 'url': 'https://pokeapi.co/api/v2/stat/7/'}, {'name': 'evasion', 'url': 'https://pokeapi.co/api/v2/stat/8/'}]
{'name': 'hp', 'url': 'https://pokeapi.co/api/v2/stat/1/'}


In [44]:
# Ingest Data in Parquet
API_caller.write_to_file(stat_list, f'landing-zone/{datetime_path}/parquet/lists', 'all_stats_list', 'parquet')
# Ingest Data in JSON
API_caller.write_to_file(stat_list, f'landing-zone/{datetime_path}/json/lists', 'all_stats_list', 'json')

print(f'completed extraction for data on : {datetime_path}')

completed extraction for data on : 2022-10-03 09-43-29


In [45]:
# ingest them into an array
for stat in stats:
    ingestion_array = []
    stat_object = API_caller.send_get_request(f'stat/{stat}').json()
    ingestion_array.append(json.dumps(stat_object))
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/parquet/items/stats', f'{stat}', 'parquet')
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/json/items/stats', f'{stat}', 'json')

**Ingesting Items Data**

In [None]:
item_list = API_caller.send_get_request(f'item/?limit=100000000000').json()["results"]
print(type(item_list))
print(len(item_list))
print(item_list)
# Get the first item of retrived Items List
print(item_list[0])
# Get the names of all Items
items = [i['name'] for i in item_list]

In [None]:
# Ingest Data in Parquet
API_caller.write_to_file(item_list, f'landing-zone/{datetime_path}/parquet/lists', 'all_items_list', 'parquet')
# Ingest Data in JSON
API_caller.write_to_file(item_list, f'landing-zone/{datetime_path}/json/lists', 'all_items_list', 'json')

print(f'completed extraction for data on : {datetime_path}')

In [None]:
# ingest them into an array
for item in items:
    ingestion_array = []
    item_object = API_caller.send_get_request(f'item/{item}').json()
    ingestion_array.append(json.dumps(item_object))
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/parquet/items/items', f'{item}', 'parquet')
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/json/items/items', f'{item}', 'json')

**Ingesting Species Data**

In [None]:
pokemon_species_list = API_caller.send_get_request(f'pokemon-species/?limit=100000000000').json()["results"]
print(type(pokemon_species_list))
print(len(pokemon_species_list))
print(pokemon_species_list)
# Get the first item of retrived Items List
print(pokemon_species_list[0])
# Get the names of all Items
pokemon_species = [i['name'] for i in pokemon_species_list]

In [None]:
# Ingest Data in Parquet
API_caller.write_to_file(pokemon_species_list, f'landing-zone/{datetime_path}/parquet/lists', 'all_pokemon_species_list', 'parquet')
# Ingest Data in JSON
API_caller.write_to_file(pokemon_species_list, f'landing-zone/{datetime_path}/json/lists', 'all_pokemon_species_list', 'json')

print(f'completed extraction for data on : {datetime_path}')

In [None]:
# ingest them into an array
for pokemon_species1 in pokemon_species:
    ingestion_array = []
    pokemon_species_object = API_caller.send_get_request(f'pokemon-species/{pokemon_species1}').json()
    ingestion_array.append(json.dumps(pokemon_species_object))
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/parquet/items/pokemon_species', f'{pokemon_species1}', 'parquet')
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/json/items/pokemon_species', f'{pokemon_species1}', 'json')

**Ingesting Ablities Data**

In [None]:
ability_list = API_caller.send_get_request(f'ability/?limit=100000000000').json()["results"]
print(type(ability_list))
print(len(ability_list))
print(ability_list)
# Get the first item of retrived Items List
print(ability_list[0])
# Get the names of all Items
abilities = [i['name'] for i in ability_list]

In [None]:
# Ingest Data in Parquet
API_caller.write_to_file(ability_list, f'landing-zone/{datetime_path}/parquet/lists', 'all_abilities_list', 'parquet')
# Ingest Data in JSON
API_caller.write_to_file(ability_list, f'landing-zone/{datetime_path}/json/lists', 'all_abilities_list', 'json')

print(f'completed extraction for data on : {datetime_path}')

In [None]:
# ingest them into an array
for ability in abilities:
    ingestion_array = []
    ability_object = API_caller.send_get_request(f'ability/{ability}').json()
    ingestion_array.append(json.dumps(ability_object))
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/parquet/items/abilities', f'{ability}', 'parquet')
    API_caller.write_to_file(ingestion_array, f'landing-zone/{datetime_path}/json/items/abilities', f'{ability}', 'json')