This notebook is used to parse the individual pokemon data.

# Libraries

In [1]:
import requests
import json
import pandas as pd
import numpy as np

# Pulling and Cleaning Data from the PokemonAPI

## Grab the Data

Pull the pokemon data from the API

In [2]:
query = requests.get('https://pokeapi.co/api/v2/pokemon?limit=1000&offset=0')
query_data = json.loads(query.text)
print(query_data.keys())

dict_keys(['count', 'next', 'previous', 'results'])


In [3]:
pokemon_data = query_data["results"]

## Data Exploration

This seciton is just for looking at different parts of the JSON file to learn how everything is structured

In [4]:
test = pokemon_data[0]
print(test.keys())

print(test["name"])

pokemon_query = requests.get(test["url"])
pokemon_query_data = json.loads(pokemon_query.text)
print(pokemon_query_data.keys())

dict_keys(['name', 'url'])
bulbasaur
dict_keys(['abilities', 'base_experience', 'forms', 'game_indices', 'height', 'held_items', 'id', 'is_default', 'location_area_encounters', 'moves', 'name', 'order', 'past_types', 'species', 'sprites', 'stats', 'types', 'weight'])


In [5]:
pokemon_query_data["id"]

1

## Clean the Data
Make the Dataframe. For now I am not sure how to handle the evolutionary data or the image data, so for now those columns will be omitted from the table

In [6]:
# Variables that will make the cleaning a lot easier
columns_needed = ["name","pokedex_id","attack", "special-attack", "defense", "special-defense", "speed", "hp", "type1", "type2", "list_of_moves", "ability_name", "height", "weight", "generation"]

game_gen = {
    "red" : 1,
    "blue" : 1,
    "yellow" : 1,
    "gold" : 2,
    "silver" : 2,
    "crystal" : 2,
    "ruby": 3,
    "sapphire": 3,
    "firered": 2,
    "leafgreen": 2,
    "emerald": 3,
    "diamond": 4,
    "pearl": 4,
    "heartgold": 2,
    "soulsilver": 2,
    "platinum": 4,
    "black": 5,
    "white": 5,
    "black-2": 5,
    "white-2": 5
}

gens = {'generation-i':1, 'generation-ii':2, 'generation-iii':3, 'generation-iv':4, 'generation-v':5}
latest_game = "back-2-white-2"
second_latest_game = "black-white"

move_names = (pd.read_csv("move_names.csv"))["name"].tolist()
ability_names = (pd.read_csv("ability_data.csv"))["name"].tolist()

In [7]:
columns_needed

['name',
 'pokedex_id',
 'attack',
 'special-attack',
 'defense',
 'special-defense',
 'speed',
 'hp',
 'type1',
 'type2',
 'list_of_moves',
 'ability_name',
 'height',
 'weight',
 'generation']

In [8]:
pokemon_df = pd.DataFrame(columns=columns_needed)

for pokemon in pokemon_data:
    # pull the page that has all the data we need
    pokemon_query = requests.get(pokemon["url"])
    pokemon_query_data = json.loads(pokemon_query.text)
    
    hold = [0] * len(columns_needed)
    
    # check that the pokemon is from generations 1-5
    try:
        first_game = pokemon_query_data["game_indices"][0]["version"]["name"]
    except IndexError:
#         print(pokemon_query_data["name"])
        continue
    
    if first_game not in list(game_gen.keys()):
        continue # if it is not the skip it
    else: # get the generation the pokemon first appeared in
        try:
            ind = columns_needed.index("generation")
            hold[ind] = game_gen[first_game]
        except ValueError: # error handling
            print("generation index not found ")
            break
        except:
            print("error getting generation information")
            break
        
        
        
    # GET THE POKEMONON'S NAME
    try:
        ind = columns_needed.index("name")
        hold[ind] = pokemon_query_data["name"]
    except ValueError: # error handling
        print("name index not found")
        break
    except:
        print("error getting name information")
        break
        
    
    
        
    # GET THE BASE STATS
    all_stats = pokemon_query_data["stats"]
    for data in all_stats:
        try:
            ind = columns_needed.index(data["stat"]["name"])
            hold[ind] = data["base_stat"]
        except ValueError: # error handling
            print(data["stat"]["name"], "index not found")
            break
        except:
            print("error getting", data["stat"]["name"], "data")
            break
        
        
        
    # GET THE TYPE INFORMATION
    try:
        two_types = False
        type_data = pokemon_query_data["types"]

        if len(type_data) > 1:
            two_types = True
        elif len(type_data) < 1:
            print(pokemon_query_data["name"], "has no types")
            break

        ind = columns_needed.index("type1")
        hold[ind] = type_data[0]["type"]["name"]

        if two_types:
            ind = columns_needed.index("type2")
            hold[ind] = type_data[1]["type"]["name"]
        else:
            ind = columns_needed.index("type2")
            hold[ind] = np.nan

    except ValueError:
        print("type1 or type2 index not foud")
        break
    except:
        print("error getting type data")
        break
        
    # GET HEIGHT
    try:
        ind = columns_needed.index("height")
        hold[ind] = pokemon_query_data["height"]
    except ValueError: # error handling
        print("height index not found")
        break
    except:
        print("error getting height information")
        break
        
    # GET WEIGHT
    try:
        ind = columns_needed.index("weight")
        hold[ind] = pokemon_query_data["weight"]
    except ValueError: # error handling
        print("weight index not found")
        break
    except:
        print("error getting weight information")
        break
        
        
    # GET MOVE INFORMATION
    # TODO: Get how the pokemon learns the data
    try:
        ind = columns_needed.index("list_of_moves")
        known_moves = []
        all_moves = pokemon_query_data["moves"]
        info = 0
        for move in all_moves:
            if move["move"]["name"] not in move_names:
                continue
            
            info = (move["move"]["name"], -1)
            #info[1] = move["move"][]
            known_moves.append(info)
            
        hold[ind] = known_moves
    except ValueError:
        print("move index not found")
        break
    except Exception as inst:
        print("error getting move list data:", inst)
        break
        
        
    # GET ABILITY INFORMATION
    try:
        ind = columns_needed.index("ability_name")
        abils = []
        all_abilities = pokemon_query_data["abilities"]
        info = 0
        for ability in all_abilities:
            if ability["ability"]["name"] not in ability_names:
                continue
            
            info = (ability["ability"]["name"], ability["is_hidden"])
            #info[1] = move["move"][]
            abils.append(info)
            
        hold[ind] = abils
    except ValueError:
        print("ability index not found")
        break
    except Exception as inst:
        print("error getting ability list data:", inst)
        print(inst)
        break
        
        
     # GET THE POKEMONON'S ID
    try:
        ind = columns_needed.index("pokedex_id")
        hold[ind] = pokemon_query_data["id"]
    except ValueError: # error handling
        print("pokedex_id index not found")
        break
    except Exception as inst:
        print("error getting pokedex ID information:", inst)
        break
        
        
    pokemon_df.loc[len(pokemon_df.index)] = hold

# Results

In [9]:
pokemon_df

Unnamed: 0,name,pokedex_id,attack,special-attack,defense,special-defense,speed,hp,type1,type2,list_of_moves,ability_name,height,weight,generation
0,bulbasaur,1,49,65,49,65,45,45,grass,poison,"[(razor-wind, -1), (swords-dance, -1), (cut, -...","[(overgrow, False), (chlorophyll, True)]",7,69,1
1,ivysaur,2,62,80,63,80,60,60,grass,poison,"[(swords-dance, -1), (cut, -1), (bind, -1), (v...","[(overgrow, False), (chlorophyll, True)]",10,130,1
2,venusaur,3,82,100,83,100,80,80,grass,poison,"[(swords-dance, -1), (cut, -1), (bind, -1), (v...","[(overgrow, False), (chlorophyll, True)]",20,1000,1
3,charmander,4,52,60,43,50,65,39,fire,,"[(mega-punch, -1), (fire-punch, -1), (thunder-...","[(blaze, False), (solar-power, True)]",6,85,1
4,charmeleon,5,64,80,58,65,80,58,fire,,"[(mega-punch, -1), (fire-punch, -1), (thunder-...","[(blaze, False), (solar-power, True)]",11,190,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
668,thundurus-therian,10020,105,145,70,80,101,79,electric,flying,"[(thunder-punch, -1), (fly, -1), (body-slam, -...","[(volt-absorb, False)]",30,610,5
669,landorus-therian,10021,145,105,90,80,91,89,ground,flying,"[(swords-dance, -1), (fly, -1), (body-slam, -1...","[(intimidate, False)]",13,680,5
670,kyurem-black,10022,170,120,100,90,95,125,dragon,ice,"[(cut, -1), (fly, -1), (ice-beam, -1), (blizza...","[(teravolt, False)]",33,3250,5
671,kyurem-white,10023,120,170,90,100,95,125,dragon,ice,"[(cut, -1), (fly, -1), (ice-beam, -1), (blizza...","[(turboblaze, False)]",36,3250,5


In [10]:
pokemon_df.to_csv("pokemon_data.csv")