# **Creating a Pokemon dataset using an API**

### **Step 1: Setting up.**

In [1]:
# Importing Libraries.
import requests as rq
import pandas as pd
import numpy as np

# Used to easily communicate wiht API.
api_url = 'https://pokeapi.co/api/v2/'

def api_response(endpoint):
  return rq.get(api_url + f'{endpoint}').json()

### **Step 2: Gathering desired data.**

##### Since API's have limits on the amount of requests you can make within a certain time period, we will need to limit the amount of pokemon data we extract, and if necesarry add time delays between each request. This will also make testing the data for accuracy much faster, since we can can load in small chunks of data at a time. For this reason we will be seperating the reauest by pokemon generation.

##### Locating Pokemon names information.

In [2]:
# Enter a pokemon generation number and get list of pokedex ID's of all pokemon in that generation.
def pokemon_gen_range(generation):
  gen_dict = {
    'gen_1': list(range(1,152)),
    'gen_2': list(range(152,252)),
    'gen_3': list(range(252,387)),
    'gen_4': list(range(387,494)),
    'gen_5': list(range(494,650)),
    'gen_6': list(range(650,722)),
    'gen_7': list(range(722,810)),
    'gen_8': list(range(810,899))
  }
  return gen_dict[f'gen_{generation}']

# Generates a list of information based on one resource and one endpoint.
def list_generator(endpoint, resource, gen):
  temp_list = []
  for i in pokemon_gen_range(gen):
    temp_list.append(api_response(f'{endpoint}/{i}')[f'{resource}'])
  return temp_list

##### Request Pokemon names. (run once)

In [3]:
# using our previous function to make a names lists for every pokemon generation seperately
gen1_names = list_generator('pokemon', 'name', 1)
gen2_names = list_generator('pokemon', 'name', 2)
gen3_names = list_generator('pokemon', 'name', 3)
gen4_names = list_generator('pokemon', 'name', 4)
gen5_names = list_generator('pokemon', 'name', 5)
gen6_names = list_generator('pokemon', 'name', 6)
gen7_names = list_generator('pokemon', 'name', 7)
gen8_names = list_generator('pokemon', 'name', 8)

In [4]:
# List of all pokemon names
poke_names = gen1_names + gen2_names + gen3_names + gen4_names + gen5_names + gen6_names + gen7_names + gen8_names

##### Locating Pokemon abilities information.

In [5]:
# Enter a pokemon generation, and get ordered lists of all the pokemon's abilites for that generation.
def ability_list_generator(generation):
  ability_1 = []
  ability_2 = []
  hidden_ability = []
  for i in pokemon_gen_range(generation):
    pokemon_number = i
    data = api_response(f'pokemon/{pokemon_number}/')
    abil_dict_length = len(api_response(f'pokemon/{pokemon_number}')['abilities'])
    ability_1.append(data['abilities'][0]['ability']['name'])
    if abil_dict_length == 1:
      ability_2.append(0)
      hidden_ability.append(0)
    elif abil_dict_length == 2 and data['abilities'][1]['is_hidden'] == True:
      hidden_ability.append(data['abilities'][1]['ability']['name'])
      ability_2.append(0)
    elif abil_dict_length == 2 and data['abilities'][1]['is_hidden'] == False:
      ability_2.append(data['abilities'][1]['ability']['name'])
      hidden_ability.append(0)
    elif abil_dict_length == 3:
      ability_2.append(data['abilities'][1]['ability']['name'])
      hidden_ability.append(data['abilities'][2]['ability']['name'])
  return ability_1, ability_2, hidden_ability

##### Request Pokemon abilities. (run once)

In [6]:
# Using our previous function to make abilities lists for every pokemon in every generation seperately.
gen1_abilities = ability_list_generator(1)
gen2_abilities = ability_list_generator(2)
gen3_abilities = ability_list_generator(3)
gen4_abilities = ability_list_generator(4)
gen5_abilities = ability_list_generator(5)
gen6_abilities = ability_list_generator(6)
gen7_abilities = ability_list_generator(7)
gen8_abilities = ability_list_generator(8)

In [7]:
# List of all abilites for all pokemon in every generation.
ability_1 = gen1_abilities[0] + gen2_abilities[0] + gen3_abilities[0] + gen4_abilities[0] + gen5_abilities[0] + gen6_abilities[0] + gen7_abilities[0] + gen8_abilities[0]
ability_2 = gen1_abilities[1] + gen2_abilities[1] + gen3_abilities[1] + gen4_abilities[1] + gen5_abilities[1] + gen6_abilities[1] + gen7_abilities[1] + gen8_abilities[1]
hidden_ability = gen1_abilities[2] + gen2_abilities[2] + gen3_abilities[2] + gen4_abilities[2] + gen5_abilities[2] + gen6_abilities[2] + gen7_abilities[2] + gen8_abilities[2]

##### Locating Pokemon types information.

In [8]:
# Enter Pokemon generation, get back ordered lists of all the pokemon's types for that generation.
def type_list_generator(generation):
  type_1 = []
  type_2 = []
  for i in pokemon_gen_range(generation):
    pokemon_number = i
    type_dict_length = len(api_response(f'pokemon/{pokemon_number}/')['types'])
    type_1.append(api_response(f'pokemon/{pokemon_number}/')['types'][0]['type']['name'])
    if type_dict_length == 1:
      type_2.append(0)
    elif type_dict_length == 2:
      type_2.append(api_response(f'pokemon/{pokemon_number}/')['types'][1]['type']['name'])
  return type_1, type_2

##### Request Pokemon types. (run once)

In [9]:
# Using our previous function to make types lists for every pokemon in every generation seperately.
gen1_types = type_list_generator(1)
gen2_types = type_list_generator(2)
gen3_types = type_list_generator(3)
gen4_types = type_list_generator(4)
gen5_types = type_list_generator(5)
gen6_types = type_list_generator(6)
gen7_types = type_list_generator(7)
gen8_types = type_list_generator(8)

In [10]:
# List of all pokemon types for all pokemon in every generation.
type_1 = gen1_types[0] + gen2_types[0] + gen3_types[0] + gen4_types[0] + gen5_types[0] + gen6_types[0] + gen7_types[0] + gen8_types[0]
type_2 = gen1_types[1] + gen2_types[1] + gen3_types[1] + gen4_types[1] + gen5_types[1] + gen6_types[1] + gen7_types[1] + gen8_types[1]

##### Locating Pokemon stats information.

In [11]:
# Enter a stat index and pokemon generation, and get back a list of that stats values for all pokemon in that generation
# Stat index: HP ==> 0, Attack ==> 1, Defense ==> 2, Special Attack ==> 3, Special Defense ==> 4, Speed ==> 5
def stat_list_generator(stat_index, generation):
  temp_list = []
  for i in pokemon_gen_range(generation):
    pokemon_number = i
    temp_data = api_response(f'pokemon/{pokemon_number}/')
    temp_list.append(temp_data['stats'][stat_index]['base_stat'])
  return temp_list

##### Request HP information. (run once)

In [12]:
# Using a previous function to make HP lists for every pokemon in every generation seperately.
gen1_HP_list = stat_list_generator(0, 1)
gen2_HP_list = stat_list_generator(0, 2)
gen3_HP_list = stat_list_generator(0, 3)
gen4_HP_list = stat_list_generator(0, 4)
gen5_HP_list = stat_list_generator(0, 5)
gen6_HP_list = stat_list_generator(0, 6)
gen7_HP_list = stat_list_generator(0, 7)
gen8_HP_list = stat_list_generator(0, 8)

In [13]:
# List of HP values for all pokemon in every generation.
HP_list = gen1_HP_list + gen2_HP_list + gen3_HP_list + gen4_HP_list + gen5_HP_list + gen6_HP_list + gen7_HP_list + gen8_HP_list

##### Request Attack information. (run once)

In [14]:
# Using a previous function to make Attack lists for every pokemon in every generation seperately.
gen1_atk_list = stat_list_generator(1, 1)
gen2_atk_list = stat_list_generator(1, 2)
gen3_atk_list = stat_list_generator(1, 3)
gen4_atk_list = stat_list_generator(1, 4)
gen5_atk_list = stat_list_generator(1, 5)
gen6_atk_list = stat_list_generator(1, 6)
gen7_atk_list = stat_list_generator(1, 7)
gen8_atk_list = stat_list_generator(1, 8)

In [15]:
# List of Attack values for all pokemon in every generation.
atk_list = gen1_atk_list + gen2_atk_list + gen3_atk_list + gen4_atk_list + gen5_atk_list + gen6_atk_list + gen7_atk_list + gen8_atk_list

##### Request Defense information. (run once)

In [16]:
# Using a previous function to make Defense lists for every pokemon in every generation seperately.
gen1_dfns_list = stat_list_generator(2, 1)
gen2_dfns_list = stat_list_generator(2, 2)
gen3_dfns_list = stat_list_generator(2, 3)
gen4_dfns_list = stat_list_generator(2, 4)
gen5_dfns_list = stat_list_generator(2, 5)
gen6_dfns_list = stat_list_generator(2, 6)
gen7_dfns_list = stat_list_generator(2, 7)
gen8_dfns_list = stat_list_generator(2, 8)

In [17]:
# List of Defense values for all pokemon in every generation.
dfns_list = gen1_dfns_list + gen2_dfns_list + gen3_dfns_list + gen4_dfns_list + gen5_dfns_list + gen6_dfns_list + gen7_dfns_list + gen8_dfns_list

##### Request Special Attack information. (run once)

In [18]:
# Using a previous function to make Special Attack lists for every pokemon in every generation seperately.
gen1_spa_list = stat_list_generator(3, 1)
gen2_spa_list = stat_list_generator(3, 2)
gen3_spa_list = stat_list_generator(3, 3)
gen4_spa_list = stat_list_generator(3, 4)
gen5_spa_list = stat_list_generator(3, 5)
gen6_spa_list = stat_list_generator(3, 6)
gen7_spa_list = stat_list_generator(3, 7)
gen8_spa_list = stat_list_generator(3, 8)

In [19]:
# List of Special Attack values for all pokemon in every generation.
spa_list = gen1_spa_list + gen2_spa_list + gen3_spa_list + gen4_spa_list + gen5_spa_list + gen6_spa_list + gen7_spa_list + gen8_spa_list

##### Request Special Defense information. (run once)

In [20]:
# Using a previous function to make Special Defense lists for every pokemon in every generation seperately.
gen1_spdf_list = stat_list_generator(4, 1)
gen2_spdf_list = stat_list_generator(4, 2)
gen3_spdf_list = stat_list_generator(4, 3)
gen4_spdf_list = stat_list_generator(4, 4)
gen5_spdf_list = stat_list_generator(4, 5)
gen6_spdf_list = stat_list_generator(4, 6)
gen7_spdf_list = stat_list_generator(4, 7)
gen8_spdf_list = stat_list_generator(4, 8)

In [21]:
# List of HP values for all pokemon in every generation.
spdf_list = gen1_spdf_list + gen2_spdf_list + gen3_spdf_list + gen4_spdf_list + gen5_spdf_list + gen6_spdf_list + gen7_spdf_list + gen8_spdf_list

##### Request Speed information. (run once)

In [22]:
# Using a previous function to make Speed lists for every pokemon in every generation seperately.
gen1_speed_list = stat_list_generator(5, 1)
gen2_speed_list = stat_list_generator(5, 2)
gen3_speed_list = stat_list_generator(5, 3)
gen4_speed_list = stat_list_generator(5, 4)
gen5_speed_list = stat_list_generator(5, 5)
gen6_speed_list = stat_list_generator(5, 6)
gen7_speed_list = stat_list_generator(5, 7)
gen8_speed_list = stat_list_generator(5, 8)

In [23]:
# List of Speed values for all pokemon in every generation.
speed_list = gen1_speed_list+ gen2_speed_list+ gen3_speed_list+ gen4_speed_list+ gen5_speed_list+ gen6_speed_list+ gen7_speed_list+ gen8_speed_list

##### Request Legendary and Mythical pokemon information. (run once)

In [24]:
# Using a previous function to make legendary boolean lists for every pokemon in every generation seperately.
gen1_legendary_list = list_generator('pokemon-species', 'is_legendary', 1)
gen2_legendary_list = list_generator('pokemon-species', 'is_legendary', 2)
gen3_legendary_list = list_generator('pokemon-species', 'is_legendary', 3)
gen4_legendary_list = list_generator('pokemon-species', 'is_legendary', 4)
gen5_legendary_list = list_generator('pokemon-species', 'is_legendary', 5)
gen6_legendary_list = list_generator('pokemon-species', 'is_legendary', 6)
gen7_legendary_list = list_generator('pokemon-species', 'is_legendary', 7)
gen8_legendary_list = list_generator('pokemon-species', 'is_legendary', 8)

# Using a previous function to make mythical boolean lists for every pokemon in every generation seperately.
gen1_mythical_list = list_generator('pokemon-species', 'is_mythical', 1)
gen2_mythical_list = list_generator('pokemon-species', 'is_mythical', 2)
gen3_mythical_list = list_generator('pokemon-species', 'is_mythical', 3)
gen4_mythical_list = list_generator('pokemon-species', 'is_mythical', 4)
gen5_mythical_list = list_generator('pokemon-species', 'is_mythical', 5)
gen6_mythical_list = list_generator('pokemon-species', 'is_mythical', 6)
gen7_mythical_list = list_generator('pokemon-species', 'is_mythical', 7)
gen8_mythical_list = list_generator('pokemon-species', 'is_mythical', 8)

In [25]:
legendary_list = gen1_legendary_list + gen2_legendary_list + gen3_legendary_list + gen4_legendary_list + gen5_legendary_list + gen6_legendary_list + gen7_legendary_list + gen8_legendary_list
mythical_list = gen1_mythical_list + gen2_mythical_list + gen3_mythical_list + gen4_mythical_list + gen5_mythical_list + gen6_mythical_list + gen7_mythical_list + gen8_mythical_list

### **Step 3: Transforming, cleaning, organizing, and exporting dataset to CSV.**

In [29]:
# Turns all previously gathered lists of information into a dictionary then a dataframe
dataframe_dict = {
    'ID': list(range(1,899)),
    'Name': poke_names,
    'Type 1': type_1,
    'Type 2': type_2,
    'Ability 1': ability_1,
    'Ability 2': ability_2,
    'Hidden Ability': hidden_ability,
    'HP': HP_list,
    'Attack': atk_list,
    'Defense': dfns_list,
    'Special Attack': spa_list,
    'Special Defense': spdf_list,
    'Speed': speed_list,
    'Legendary': legendary_list,
    'Mythical': mythical_list
}
pokemon_df = pd.DataFrame(dataframe_dict)

# Making Psuedo-Legendary Column
pokemon_df['base_stat_total'] = pokemon_df.iloc[:, 7:14].sum(axis=1)
pokemon_df.loc[(pokemon_df['base_stat_total'] == 600) & (pokemon_df['Legendary'] == False) & (pokemon_df['Mythical'] == False), 'Psuedo-Legendary'] = True
pokemon_df['Psuedo-Legendary'] = pokemon_df['Psuedo-Legendary'].notna()
pokemon_df = pokemon_df.drop(columns=['base_stat_total'])

# Replacing zero's with Nan values
cols = ['Type 2', 'Ability 2', 'Hidden Ability']
pokemon_df[cols] = pokemon_df[cols].replace(0, np.nan)
display(pokemon_df)

# Exporting dataset to CSV
# pokemon_df.to_csv('pokemon-data.csv')

Unnamed: 0,ID,Name,Type 1,Type 2,Ability 1,Ability 2,Hidden Ability,HP,Attack,Defense,Special Attack,Special Defense,Speed,Legendary,Mythical,Psuedo-Legendary
0,1,bulbasaur,grass,poison,overgrow,,chlorophyll,45,49,49,65,65,45,False,False,False
1,2,ivysaur,grass,poison,overgrow,,chlorophyll,60,62,63,80,80,60,False,False,False
2,3,venusaur,grass,poison,overgrow,,chlorophyll,80,82,83,100,100,80,False,False,False
3,4,charmander,fire,,blaze,,solar-power,39,52,43,60,50,65,False,False,False
4,5,charmeleon,fire,,blaze,,solar-power,58,64,58,80,65,80,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
893,894,regieleki,electric,,transistor,,,80,100,50,100,50,200,True,False,False
894,895,regidrago,dragon,,dragons-maw,,,200,100,50,100,50,80,True,False,False
895,896,glastrier,ice,,chilling-neigh,,,100,145,130,65,110,30,True,False,False
896,897,spectrier,ghost,,grim-neigh,,,100,65,60,145,80,130,True,False,False
