# Pokemon Classification

### Import Libraries

In [1]:
import numpy as np
import pandas as pd
from pandas import isnull
import config
import json
import requests
from collections import defaultdict, Counter
import time
import random

import os
import re
import emoji
from nltk.corpus import stopwords
from string import punctuation
from wordcloud import WordCloud 
from matplotlib import pyplot as plt

from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer


### API Request
Source Documentation [https://pokeapi.co/docs/v2#pokemon, https://pokeapi.co/docs/v2#pokemon-colors]

The data being pulled using the pokemon api are the pokemon statistics and characteristics. 


In [None]:
api_url = "https://pokeapi.co/api/v2/pokemon/"
params = {"limit": "1281"} # The total amount of pokemon
pokemon_api = []
res = requests.request("GET", api_url, params=params)
data = res.json()
print("Pokemon and Pokemon APIs have been pulled")
for pokemon in data["results"]:
    for k, v in pokemon.items():
        pokemon_api.append(v)
print("List of Pokemon and Pokemon API have been created")

In [None]:
# Separate Names and APIs into two lists
pokemon_name = pokemon_api[::2]
pokemon_API = pokemon_api[1::2]

# Merge into a Dictionary with Pokemon Names as Key and APIs as Value
pokemon_all = {}
for key in pokemon_name:
    for value in pokemon_API:
        pokemon_all[key] = value
        pokemon_API.remove(value)
        break
print("Dictionary of All Pokemon and Their APIs is: ", pokemon_all)

In [None]:
# List of all pokemon as well as the api url call
pokemon_all

In [None]:
# Check to see if there are 1281 pokemon
print("The number of pokemon called should be 1281: ", len(pokemon_all))

In [None]:
# Dictionary set up for Pokemon Statistics
pokemon_data = defaultdict(list)

for pokemon, api in pokemon_all.items() :
    # request the page and sleep
    r = requests.request("GET", str(api))
    time.sleep(5 + 10*random.random())

    # Add to ensure that request was successful
    #print("If 200, request was successful: ", r.status_code)

    d = r.json()
    pokemon_data[pokemon].append(d)


In [None]:
# API to pull Pokemon Colors

color_api = "https://pokeapi.co/api/v2/pokemon-color/"
res1 = requests.request("GET", color_api)
colors = []
data1 = res1.json()
for color in data1["results"]:
    for k, v in color.items():
        colors.append(v)

# Separate Names and APIs into two lists
pokemon_color = colors[::2]
pokemon_color_API = colors[1::2]

In [None]:
#Merge into a Dictionary with Pokemon Colors as Key and APIs as Value
pokemon_colors = {}
for key in pokemon_color:
    for value in pokemon_color_API:
        pokemon_colors[key] = value
        pokemon_color_API.remove(value)
        break
print("Dictionary of All Pokemon Colors and Their APIs is: ", pokemon_colors)

In [None]:
# Dictionary set up for Pokemon Colors
pokemon_colors_data = defaultdict(list)

for color, api in pokemon_colors.items() :
    # request the page and sleep
    r = requests.request("GET", str(api))
    time.sleep(5 + 10*random.random())

    # Add to ensure that request was successful
    #print("If 200, request was successful: ", r.status_code)

    d = r.json()
    pokemon_colors_data[color].append(d)

In [None]:
print(len(pokemon_colors_data))

Export Pokemon Statistics API data to txt file to store data as the API call took about 7.5 hours.

Export of Pokemon Colors API data to txt file to store data. API call time approx. 2 minutes.

In [None]:
with open('Pokemon_Data.txt', 'w') as Pokemon_Data:
     Pokemon_Data.write(json.dumps(pokemon_data))

In [None]:
with open('Pokemon_Colors.txt', 'w') as Pokemon_Colors:
     Pokemon_Colors.write(json.dumps(pokemon_colors_data))

### Load Data

In [2]:
f = open('Pokemon_data.txt')
data = json.load(f)
print(len(data))

1281


In [3]:
f1 = open('Pokemon_colors.txt')
data1 = json.load(f1)
print(len(data1))

10


### Data Ingestion and Pre-Processing

In [4]:
# Some punctuation variations
punctuation = set(punctuation) # speeds up comparison
tw_punct = punctuation - {'''#{}[]'''}

# Stopwords
sw = stopwords.words("english")

# Two useful regex
whitespace_pattern = re.compile(r"\s+")
hashtag_pattern = re.compile(r"^#[0-9a-zA-Z]+")


# and now our functions
def descriptive_stats(tokens, num_words = 5, verbose=True) :
    """
        Given a list of tokens, print number of tokens, number of unique tokens, 
        number of characters, lexical diversity (https://en.wikipedia.org/wiki/Lexical_diversity), 
        and num_tokens most common tokens. Return a list with the number of tokens, number
        of unique tokens, lexical diversity, and number of characters. 
    
    """
    
    # Fill in the correct values here. 
    num_tokens = len(tokens)
    num_unique_tokens = len(Counter(tokens).keys())
    lexical_diversity = round((num_unique_tokens/num_tokens),2) # Rounded to 2 Digits to match format of existing decimal rounding below
    num_characters = sum(len(i) for i in tokens)
    most_common = Counter(tokens).most_common(num_words)
    if verbose :        
        print(f"There are {num_tokens} tokens in the data.")
        print(f"There are {num_unique_tokens} unique tokens in the data.")
        print(f"There are {num_characters} characters in the data.")
        print(f"The lexical diversity is {lexical_diversity:.3f} in the data.")
    
        # print the five most common tokens
        print(f"The most common tokens are {most_common}.")
        
    return

# Removing URL's
def remove_URL(text):
    return re.sub(r"'url ' : 'http\S+", "", text)

def remove_stop(tokens) :
    tokens = [file for file in tokens if file not in sw]
    return(tokens)
 
def remove_punctuation(text, punct_set=tw_punct) : 
    return("".join([ch for ch in text if ch not in punct_set]))

def tokenize(text) : 
    """ Splitting on whitespace rather than the book's tokenize function. That 
        function will drop tokens like '#hashtag' or '2A', which we need for Twitter. """
    
    text = [file.lower().strip() for file in text.split()]

    return(text)

def prepare(text, pipeline) : 
    tokens = str(text)
    
    for transform in pipeline : 
        tokens = transform(tokens)
        
    return(tokens)

In [5]:
pokemon_list = []
for pokemon in data:
    for features in data[pokemon]:
        #print(a) # a is the whole section of combined features
        for feature, description in features.items():
            # b is the dictionaries such as ability, weight, stats that are pulled
            #print(c) # Farthest I can break down for now.
            poke_dict = {'pokemon': pokemon, 'feature': feature, 'description': description}
            pokemon_list.append(poke_dict)

In [6]:
my_pipeline = [str.lower,  remove_URL,tokenize, remove_punctuation, tokenize]
cleaned_data = []
for row in pokemon_list :
    text = " ".join(prepare(row, pipeline = my_pipeline))
    if text :
        cleaned_data.append(text)

In [7]:
df = pd.DataFrame.from_dict(pokemon_list)
df

Unnamed: 0,pokemon,feature,description
0,bulbasaur,abilities,"[{'ability': {'name': 'overgrow', 'url': 'http..."
1,bulbasaur,base_experience,64
2,bulbasaur,forms,"[{'name': 'bulbasaur', 'url': 'https://pokeapi..."
3,bulbasaur,game_indices,"[{'game_index': 153, 'version': {'name': 'red'..."
4,bulbasaur,height,7
...,...,...,...
23053,miraidon-glide-mode,species,"{'name': 'miraidon', 'url': 'https://pokeapi.c..."
23054,miraidon-glide-mode,sprites,"{'back_default': None, 'back_female': None, 'b..."
23055,miraidon-glide-mode,stats,"[{'base_stat': 100, 'effort': 0, 'stat': {'nam..."
23056,miraidon-glide-mode,types,"[{'slot': 1, 'type': {'name': 'electric', 'url..."


In [8]:
# Transpose Dataframe for Pokemon Statistics

tdf = df.pivot(columns = 'feature', values = 'description', index = 'pokemon')
tdf

feature,abilities,base_experience,forms,game_indices,height,held_items,id,is_default,location_area_encounters,moves,name,order,past_types,species,sprites,stats,types,weight
pokemon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
abomasnow,"[{'ability': {'name': 'snow-warning', 'url': '...",173,"[{'name': 'abomasnow', 'url': 'https://pokeapi...","[{'game_index': 460, 'version': {'name': 'diam...",22,"[{'item': {'name': 'never-melt-ice', 'url': 'h...",460,True,https://pokeapi.co/api/v2/pokemon/460/encounters,"[{'move': {'name': 'mega-punch', 'url': 'https...",abomasnow,585,[],"{'name': 'abomasnow', 'url': 'https://pokeapi....",{'back_default': 'https://raw.githubuserconten...,"[{'base_stat': 90, 'effort': 0, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'grass', 'url': ...",1355
abomasnow-mega,"[{'ability': {'name': 'snow-warning', 'url': '...",208,"[{'name': 'abomasnow-mega', 'url': 'https://po...",[],27,"[{'item': {'name': 'never-melt-ice', 'url': 'h...",10060,False,https://pokeapi.co/api/v2/pokemon/10060/encoun...,"[{'move': {'name': 'ice-punch', 'url': 'https:...",abomasnow-mega,586,[],"{'name': 'abomasnow', 'url': 'https://pokeapi....","{'back_default': None, 'back_female': None, 'b...","[{'base_stat': 90, 'effort': 0, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'grass', 'url': ...",1850
abra,"[{'ability': {'name': 'synchronize', 'url': 'h...",62,"[{'name': 'abra', 'url': 'https://pokeapi.co/a...","[{'game_index': 148, 'version': {'name': 'red'...",9,"[{'item': {'name': 'twisted-spoon', 'url': 'ht...",63,True,https://pokeapi.co/api/v2/pokemon/63/encounters,"[{'move': {'name': 'mega-punch', 'url': 'https...",abra,103,[],"{'name': 'abra', 'url': 'https://pokeapi.co/ap...",{'back_default': 'https://raw.githubuserconten...,"[{'base_stat': 25, 'effort': 0, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'psychic', 'url'...",195
absol,"[{'ability': {'name': 'pressure', 'url': 'http...",163,"[{'name': 'absol', 'url': 'https://pokeapi.co/...","[{'game_index': 376, 'version': {'name': 'ruby...",12,"[{'item': {'name': 'life-orb', 'url': 'https:/...",359,True,https://pokeapi.co/api/v2/pokemon/359/encounters,"[{'move': {'name': 'scratch', 'url': 'https://...",absol,478,[],"{'name': 'absol', 'url': 'https://pokeapi.co/a...",{'back_default': 'https://raw.githubuserconten...,"[{'base_stat': 65, 'effort': 0, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'dark', 'url': '...",470
absol-mega,"[{'ability': {'name': 'magic-bounce', 'url': '...",198,"[{'name': 'absol-mega', 'url': 'https://pokeap...",[],12,"[{'item': {'name': 'life-orb', 'url': 'https:/...",10057,False,https://pokeapi.co/api/v2/pokemon/10057/encoun...,"[{'move': {'name': 'scratch', 'url': 'https://...",absol-mega,479,[],"{'name': 'absol', 'url': 'https://pokeapi.co/a...","{'back_default': None, 'back_female': None, 'b...","[{'base_stat': 65, 'effort': 0, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'dark', 'url': '...",490
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zygarde-10,"[{'ability': {'name': 'aura-break', 'url': 'ht...",243,"[{'name': 'zygarde-10', 'url': 'https://pokeap...",[],12,[],10181,False,https://pokeapi.co/api/v2/pokemon/10181/encoun...,"[{'move': {'name': 'bind', 'url': 'https://pok...",zygarde-10,859,[],"{'name': 'zygarde', 'url': 'https://pokeapi.co...","{'back_default': None, 'back_female': None, 'b...","[{'base_stat': 54, 'effort': 3, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'dragon', 'url':...",335
zygarde-10-power-construct,"[{'ability': {'name': 'power-construct', 'url'...",243,"[{'name': 'zygarde-10-power-construct', 'url':...",[],12,[],10118,False,https://pokeapi.co/api/v2/pokemon/10118/encoun...,"[{'move': {'name': 'bind', 'url': 'https://pok...",zygarde-10-power-construct,860,[],"{'name': 'zygarde', 'url': 'https://pokeapi.co...",{'back_default': 'https://raw.githubuserconten...,"[{'base_stat': 54, 'effort': 3, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'dragon', 'url':...",335
zygarde-50,"[{'ability': {'name': 'aura-break', 'url': 'ht...",300,"[{'name': 'zygarde-50', 'url': 'https://pokeap...",[],50,[],718,True,https://pokeapi.co/api/v2/pokemon/718/encounters,"[{'move': {'name': 'bind', 'url': 'https://pok...",zygarde-50,858,[],"{'name': 'zygarde', 'url': 'https://pokeapi.co...","{'back_default': None, 'back_female': None, 'b...","[{'base_stat': 108, 'effort': 3, 'stat': {'nam...","[{'slot': 1, 'type': {'name': 'dragon', 'url':...",3050
zygarde-50-power-construct,"[{'ability': {'name': 'power-construct', 'url'...",300,"[{'name': 'zygarde-50-power-construct', 'url':...",[],50,[],10119,False,https://pokeapi.co/api/v2/pokemon/10119/encoun...,"[{'move': {'name': 'bind', 'url': 'https://pok...",zygarde-50-power-construct,861,[],"{'name': 'zygarde', 'url': 'https://pokeapi.co...",{'back_default': 'https://raw.githubuserconten...,"[{'base_stat': 108, 'effort': 3, 'stat': {'nam...","[{'slot': 1, 'type': {'name': 'dragon', 'url':...",3050


In [9]:
# Reset Index as ID for the dataframe

tdf.reset_index(inplace=True)
tdf.set_index('id', inplace = True)
tdf.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1281 entries, 460 to 10120
Data columns (total 18 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   pokemon                   1281 non-null   object
 1   abilities                 1281 non-null   object
 2   base_experience           1126 non-null   object
 3   forms                     1281 non-null   object
 4   game_indices              1281 non-null   object
 5   height                    1281 non-null   object
 6   held_items                1281 non-null   object
 7   is_default                1281 non-null   object
 8   location_area_encounters  1281 non-null   object
 9   moves                     1281 non-null   object
 10  name                      1281 non-null   object
 11  order                     1281 non-null   object
 12  past_types                1281 non-null   object
 13  species                   1281 non-null   object
 14  sprites                   

In [10]:
pokemon_colors_list = []
for colors in data1:
    for features in data1[colors]:
        #print(a) # a is the whole section of combined features
        for feature, description in features.items():
            # b is the dictionaries such as ability, weight, stats that are pulled
            #print(c) # Farthest I can break down for now.
            poke_dict1 = {'color': colors, 'features': feature, 'descriptions': description}
            pokemon_colors_list.append(poke_dict1)

In [11]:
# Transpose Dataframe

df1 = pd.DataFrame.from_dict(pokemon_colors_list)
cdf = df1.pivot(columns = 'features', values = 'descriptions', index = 'color')
cdf

features,id,name,names,pokemon_species
color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
black,1,black,"[{'language': {'name': 'ja-Hrkt', 'url': 'http...","[{'name': 'murkrow', 'url': 'https://pokeapi.c..."
blue,2,blue,"[{'language': {'name': 'ja-Hrkt', 'url': 'http...","[{'name': 'squirtle', 'url': 'https://pokeapi...."
brown,3,brown,"[{'language': {'name': 'ja-Hrkt', 'url': 'http...","[{'name': 'weedle', 'url': 'https://pokeapi.co..."
gray,4,gray,"[{'language': {'name': 'ja-Hrkt', 'url': 'http...","[{'name': 'machop', 'url': 'https://pokeapi.co..."
green,5,green,"[{'language': {'name': 'ja-Hrkt', 'url': 'http...","[{'name': 'bulbasaur', 'url': 'https://pokeapi..."
pink,6,pink,"[{'language': {'name': 'ja-Hrkt', 'url': 'http...","[{'name': 'slowpoke', 'url': 'https://pokeapi...."
purple,7,purple,"[{'language': {'name': 'ja-Hrkt', 'url': 'http...","[{'name': 'rattata', 'url': 'https://pokeapi.c..."
red,8,red,"[{'language': {'name': 'ja-Hrkt', 'url': 'http...","[{'name': 'charmander', 'url': 'https://pokeap..."
white,9,white,"[{'language': {'name': 'ja-Hrkt', 'url': 'http...","[{'name': 'seel', 'url': 'https://pokeapi.co/a..."
yellow,10,yellow,"[{'language': {'name': 'ja-Hrkt', 'url': 'http...","[{'name': 'sandshrew', 'url': 'https://pokeapi..."


In [12]:
# Drop Unnecessary Columns

cdf = cdf.drop(columns = ['id', 'names'])
cdf = cdf.reset_index(drop = True)
cdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   name             10 non-null     object
 1   pokemon_species  10 non-null     object
dtypes: object(2)
memory usage: 288.0+ bytes


In [13]:
cdf

features,name,pokemon_species
0,black,"[{'name': 'murkrow', 'url': 'https://pokeapi.c..."
1,blue,"[{'name': 'squirtle', 'url': 'https://pokeapi...."
2,brown,"[{'name': 'weedle', 'url': 'https://pokeapi.co..."
3,gray,"[{'name': 'machop', 'url': 'https://pokeapi.co..."
4,green,"[{'name': 'bulbasaur', 'url': 'https://pokeapi..."
5,pink,"[{'name': 'slowpoke', 'url': 'https://pokeapi...."
6,purple,"[{'name': 'rattata', 'url': 'https://pokeapi.c..."
7,red,"[{'name': 'charmander', 'url': 'https://pokeap..."
8,white,"[{'name': 'seel', 'url': 'https://pokeapi.co/a..."
9,yellow,"[{'name': 'sandshrew', 'url': 'https://pokeapi..."


### Descriptive Statistics

In [14]:
# Dictionary
descriptive_stats(cleaned_data)

There are 23058 tokens in the data.
There are 23058 unique tokens in the data.
There are 139245926 characters in the data.
The lexical diversity is 1.000 in the data.
The most common tokens are [("{'pokemon':'bulbasaur','feature':'abilities','description':[{'ability':{'name':'overgrow','url':'https://pokeapi.co/api/v2/ability/65/'},'is_hidden':false,'slot':1},{'ability':{'name':'chlorophyll','url':'https://pokeapi.co/api/v2/ability/34/'},'is_hidden':true,'slot':3}]}", 1), ("{'pokemon':'bulbasaur','feature':'base_experience','description':64}", 1), ("{'pokemon':'bulbasaur','feature':'forms','description':[{'name':'bulbasaur','url':'https://pokeapi.co/api/v2/pokemon-form/1/'}]}", 1), ("{'pokemon':'bulbasaur','feature':'game_indices','description':[{'game_index':153,'version':{'name':'red','url':'https://pokeapi.co/api/v2/version/1/'}},{'game_index':153,'version':{'name':'blue','url':'https://pokeapi.co/api/v2/version/2/'}},{'game_index':153,'version':{'name':'yellow','url':'https://pokea

In [15]:
# DataFrame
descriptive_stats(tdf)

There are 1281 tokens in the data.
There are 18 unique tokens in the data.
There are 152 characters in the data.
The lexical diversity is 0.010 in the data.
The most common tokens are [('pokemon', 1), ('abilities', 1), ('base_experience', 1), ('forms', 1), ('game_indices', 1)].


### Data Cleaning

`Moves`, `Abilities`, and `Types` features have a list of dictionaries within each pokemon type. For getting it ready to model, cleaning is done to separate out the data to have a list of terms.


In [16]:
tdf

feature,pokemon,abilities,base_experience,forms,game_indices,height,held_items,is_default,location_area_encounters,moves,name,order,past_types,species,sprites,stats,types,weight
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
460,abomasnow,"[{'ability': {'name': 'snow-warning', 'url': '...",173,"[{'name': 'abomasnow', 'url': 'https://pokeapi...","[{'game_index': 460, 'version': {'name': 'diam...",22,"[{'item': {'name': 'never-melt-ice', 'url': 'h...",True,https://pokeapi.co/api/v2/pokemon/460/encounters,"[{'move': {'name': 'mega-punch', 'url': 'https...",abomasnow,585,[],"{'name': 'abomasnow', 'url': 'https://pokeapi....",{'back_default': 'https://raw.githubuserconten...,"[{'base_stat': 90, 'effort': 0, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'grass', 'url': ...",1355
10060,abomasnow-mega,"[{'ability': {'name': 'snow-warning', 'url': '...",208,"[{'name': 'abomasnow-mega', 'url': 'https://po...",[],27,"[{'item': {'name': 'never-melt-ice', 'url': 'h...",False,https://pokeapi.co/api/v2/pokemon/10060/encoun...,"[{'move': {'name': 'ice-punch', 'url': 'https:...",abomasnow-mega,586,[],"{'name': 'abomasnow', 'url': 'https://pokeapi....","{'back_default': None, 'back_female': None, 'b...","[{'base_stat': 90, 'effort': 0, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'grass', 'url': ...",1850
63,abra,"[{'ability': {'name': 'synchronize', 'url': 'h...",62,"[{'name': 'abra', 'url': 'https://pokeapi.co/a...","[{'game_index': 148, 'version': {'name': 'red'...",9,"[{'item': {'name': 'twisted-spoon', 'url': 'ht...",True,https://pokeapi.co/api/v2/pokemon/63/encounters,"[{'move': {'name': 'mega-punch', 'url': 'https...",abra,103,[],"{'name': 'abra', 'url': 'https://pokeapi.co/ap...",{'back_default': 'https://raw.githubuserconten...,"[{'base_stat': 25, 'effort': 0, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'psychic', 'url'...",195
359,absol,"[{'ability': {'name': 'pressure', 'url': 'http...",163,"[{'name': 'absol', 'url': 'https://pokeapi.co/...","[{'game_index': 376, 'version': {'name': 'ruby...",12,"[{'item': {'name': 'life-orb', 'url': 'https:/...",True,https://pokeapi.co/api/v2/pokemon/359/encounters,"[{'move': {'name': 'scratch', 'url': 'https://...",absol,478,[],"{'name': 'absol', 'url': 'https://pokeapi.co/a...",{'back_default': 'https://raw.githubuserconten...,"[{'base_stat': 65, 'effort': 0, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'dark', 'url': '...",470
10057,absol-mega,"[{'ability': {'name': 'magic-bounce', 'url': '...",198,"[{'name': 'absol-mega', 'url': 'https://pokeap...",[],12,"[{'item': {'name': 'life-orb', 'url': 'https:/...",False,https://pokeapi.co/api/v2/pokemon/10057/encoun...,"[{'move': {'name': 'scratch', 'url': 'https://...",absol-mega,479,[],"{'name': 'absol', 'url': 'https://pokeapi.co/a...","{'back_default': None, 'back_female': None, 'b...","[{'base_stat': 65, 'effort': 0, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'dark', 'url': '...",490
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10181,zygarde-10,"[{'ability': {'name': 'aura-break', 'url': 'ht...",243,"[{'name': 'zygarde-10', 'url': 'https://pokeap...",[],12,[],False,https://pokeapi.co/api/v2/pokemon/10181/encoun...,"[{'move': {'name': 'bind', 'url': 'https://pok...",zygarde-10,859,[],"{'name': 'zygarde', 'url': 'https://pokeapi.co...","{'back_default': None, 'back_female': None, 'b...","[{'base_stat': 54, 'effort': 3, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'dragon', 'url':...",335
10118,zygarde-10-power-construct,"[{'ability': {'name': 'power-construct', 'url'...",243,"[{'name': 'zygarde-10-power-construct', 'url':...",[],12,[],False,https://pokeapi.co/api/v2/pokemon/10118/encoun...,"[{'move': {'name': 'bind', 'url': 'https://pok...",zygarde-10-power-construct,860,[],"{'name': 'zygarde', 'url': 'https://pokeapi.co...",{'back_default': 'https://raw.githubuserconten...,"[{'base_stat': 54, 'effort': 3, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'dragon', 'url':...",335
718,zygarde-50,"[{'ability': {'name': 'aura-break', 'url': 'ht...",300,"[{'name': 'zygarde-50', 'url': 'https://pokeap...",[],50,[],True,https://pokeapi.co/api/v2/pokemon/718/encounters,"[{'move': {'name': 'bind', 'url': 'https://pok...",zygarde-50,858,[],"{'name': 'zygarde', 'url': 'https://pokeapi.co...","{'back_default': None, 'back_female': None, 'b...","[{'base_stat': 108, 'effort': 3, 'stat': {'nam...","[{'slot': 1, 'type': {'name': 'dragon', 'url':...",3050
10119,zygarde-50-power-construct,"[{'ability': {'name': 'power-construct', 'url'...",300,"[{'name': 'zygarde-50-power-construct', 'url':...",[],50,[],False,https://pokeapi.co/api/v2/pokemon/10119/encoun...,"[{'move': {'name': 'bind', 'url': 'https://pok...",zygarde-50-power-construct,861,[],"{'name': 'zygarde', 'url': 'https://pokeapi.co...",{'back_default': 'https://raw.githubuserconten...,"[{'base_stat': 108, 'effort': 3, 'stat': {'nam...","[{'slot': 1, 'type': {'name': 'dragon', 'url':...",3050


Clean Types into list of Types

In [17]:
pokemon_types = []
for val in tdf['types']:
    #print(val)
    res = [sub['type'] for sub in val]
    #print(res) # list of dictionaries of moves
    results = [sub['name'] for sub in res]
    #print(results) # Woo! Output of Moves
    pokemon_types.append(results)

In [18]:
types_df = pd.DataFrame(columns = ["types"])
poke_types = []
for row in pokemon_types[:1281]:
    row_dict = {'types': row}
    poke_types.append(row_dict)

types_df = pd.DataFrame.from_dict(poke_types)
types_df.reset_index(drop=True)

Unnamed: 0,types
0,"[grass, ice]"
1,"[grass, ice]"
2,[psychic]
3,[dark]
4,[dark]
...,...
1276,"[dragon, ground]"
1277,"[dragon, ground]"
1278,"[dragon, ground]"
1279,"[dragon, ground]"


Clean Abilities Column from List of Dictionaries into list of abilities

In [19]:
pokemon_abilities = []
for val in tdf['abilities']:
    #print(val)
    res = [sub['ability'] for sub in val]
    #print(res) # list of dictionaries of moves
    results = [sub['name'] for sub in res]
    #print(results) # Woo! Output of Moves
    pokemon_abilities.append(results)

In [20]:
abilities_df = pd.DataFrame(columns = ["abilities"])
poke_abilities = []
for row in pokemon_abilities[:1281]:
    row_dict = {'abilities': row}
    poke_abilities.append(row_dict)

abilities_df = pd.DataFrame.from_dict(poke_abilities)
abilities_df.reset_index(drop=True)

Unnamed: 0,abilities
0,"[snow-warning, soundproof]"
1,[snow-warning]
2,"[synchronize, inner-focus, magic-guard]"
3,"[pressure, super-luck, justified]"
4,[magic-bounce]
...,...
1276,[aura-break]
1277,[power-construct]
1278,[aura-break]
1279,[power-construct]


Drop Old Abilities Column to prepare for Cleaned Abilities to Dataframe

In [21]:
tdf = tdf.drop(columns=['abilities'])

Clean moves into list of moves

In [22]:
pokemon_moves = []
for val in tdf['moves']:
    #print(val)
    res = [sub['move'] for sub in val]
    #print(res) # list of dictionaries of moves
    results = [sub['name'] for sub in res]
    #print(results) # Woo! Output of Moves
    pokemon_moves.append(results)

In [23]:
moves_df = pd.DataFrame(columns = ["moves"])
poke_moves = []
for row in pokemon_moves[:1281]:
    row_dict = {'moves': row}
    poke_moves.append(row_dict)

moves_df = pd.DataFrame.from_dict(poke_moves)
moves_df.reset_index(drop=True)

Unnamed: 0,moves
0,"[mega-punch, ice-punch, swords-dance, mega-kic..."
1,"[ice-punch, swords-dance, leer, mist, ice-beam..."
2,"[mega-punch, fire-punch, ice-punch, thunder-pu..."
3,"[scratch, razor-wind, swords-dance, cut, headb..."
4,"[scratch, razor-wind, swords-dance, cut, doubl..."
...,...
1276,"[bind, body-slam, bite, hyper-beam, earthquake..."
1277,"[bind, body-slam, bite, hyper-beam, earthquake..."
1278,"[bind, body-slam, bite, hyper-beam, strength, ..."
1279,"[bind, body-slam, bite, hyper-beam, earthquake..."


Clean Game Indices

In [24]:
pokemon_game = []
for val in tdf['game_indices']:
    #print(val)
    res = [sub['version'] for sub in val]
    #print(res) # list of dictionaries of moves
    results = [sub['name'] for sub in res]
    #print(results) # Woo! Output of Moves
    pokemon_game.append(results)

In [25]:
game_df = pd.DataFrame(columns = ["pokemon_games"])
poke_game = []
for row in pokemon_game[:1281]:
    row_dict = {'pokemon_games': row}
    poke_game.append(row_dict)

game_df = pd.DataFrame.from_dict(poke_game)
game_df.reset_index(drop=True)

Unnamed: 0,pokemon_games
0,"[diamond, pearl, platinum, heartgold, soulsilv..."
1,[]
2,"[red, blue, yellow, gold, silver, crystal, rub..."
3,"[ruby, sapphire, emerald, firered, leafgreen, ..."
4,[]
...,...
1276,[]
1277,[]
1278,[]
1279,[]


In [26]:
tdf = tdf.reset_index(drop=True)

In [27]:
clean_df = pd.merge(tdf, abilities_df, left_index = True, right_index = True)
clean_df = clean_df.drop(columns=['moves', 'types', 'game_indices'])
clean_df = pd.merge(clean_df, moves_df, left_index = True, right_index = True)
clean_df = pd.merge(clean_df, types_df, left_index = True, right_index = True)
clean_df = pd.merge(clean_df, game_df, left_index = True, right_index = True)

In [28]:
clean_df = clean_df.drop(columns=['forms', 'held_items', 'is_default', 'location_area_encounters', 'past_types', 'species', 'sprites', 'stats'])
clean_df

Unnamed: 0,pokemon,base_experience,height,name,order,weight,abilities,moves,types,pokemon_games
0,abomasnow,173,22,abomasnow,585,1355,"[snow-warning, soundproof]","[mega-punch, ice-punch, swords-dance, mega-kic...","[grass, ice]","[diamond, pearl, platinum, heartgold, soulsilv..."
1,abomasnow-mega,208,27,abomasnow-mega,586,1850,[snow-warning],"[ice-punch, swords-dance, leer, mist, ice-beam...","[grass, ice]",[]
2,abra,62,9,abra,103,195,"[synchronize, inner-focus, magic-guard]","[mega-punch, fire-punch, ice-punch, thunder-pu...",[psychic],"[red, blue, yellow, gold, silver, crystal, rub..."
3,absol,163,12,absol,478,470,"[pressure, super-luck, justified]","[scratch, razor-wind, swords-dance, cut, headb...",[dark],"[ruby, sapphire, emerald, firered, leafgreen, ..."
4,absol-mega,198,12,absol-mega,479,490,[magic-bounce],"[scratch, razor-wind, swords-dance, cut, doubl...",[dark],[]
...,...,...,...,...,...,...,...,...,...,...
1276,zygarde-10,243,12,zygarde-10,859,335,[aura-break],"[bind, body-slam, bite, hyper-beam, earthquake...","[dragon, ground]",[]
1277,zygarde-10-power-construct,243,12,zygarde-10-power-construct,860,335,[power-construct],"[bind, body-slam, bite, hyper-beam, earthquake...","[dragon, ground]",[]
1278,zygarde-50,300,50,zygarde-50,858,3050,[aura-break],"[bind, body-slam, bite, hyper-beam, strength, ...","[dragon, ground]",[]
1279,zygarde-50-power-construct,300,50,zygarde-50-power-construct,861,3050,[power-construct],"[bind, body-slam, bite, hyper-beam, earthquake...","[dragon, ground]",[]


In [29]:
clean_df = clean_df.drop(columns = ['pokemon_games']) # Dropped since too many empty lists.

#### Cleaning of Pokemon Colors and Species

In [30]:
pokemon = []
for val in cdf['pokemon_species']:
    # print(val.type) # Val is list
    res = [sub['name'] for sub in val]
    #print(res)
    pokemon.append(res)

In [31]:
black = pokemon[0]
blue = pokemon[1]
brown = pokemon[2]
gray = pokemon[3] 
green = pokemon[4]
pink = pokemon[5]
purple = pokemon[6]
red = pokemon[7]
white = pokemon[8]
yellow = pokemon[9]

In [32]:
pokemon_colors = pd.DataFrame((list(zip(black, blue, brown, gray, green, pink, purple, red, white, yellow))), columns = ['black', 'blue', 'brown', 'gray', 'green', 'pink', 'purple', 'red', 'white', 'yellow'])
pokemon_colors

Unnamed: 0,black,blue,brown,gray,green,pink,purple,red,white,yellow
0,murkrow,squirtle,weedle,machop,bulbasaur,slowpoke,rattata,charmander,seel,sandshrew
1,unown,nidoran-f,pidgey,magnemite,caterpie,exeggcute,ekans,paras,togepi,meowth
2,sneasel,oddish,spearow,onix,bellsprout,lickitung,nidoran-m,krabby,mareep,psyduck
3,houndour,poliwag,vulpix,rhyhorn,scyther,porygon,zubat,voltorb,smeargle,ponyta
4,mawile,tentacool,diglett,misdreavus,chikorita,mew,venonat,goldeen,lugia,drowzee
5,spoink,tangela,mankey,pineco,spinarak,cleffa,grimer,magikarp,wingull,zapdos
6,seviper,horsea,growlithe,qwilfish,natu,igglybuff,shellder,ledyba,ralts,moltres
7,shuppet,lapras,abra,remoraid,larvitar,hoppip,gastly,yanma,zangoose,cyndaquil
8,duskull,omanyte,geodude,skarmory,celebi,snubbull,koffing,slugma,absol,pichu
9,chatot,articuno,farfetchd,poochyena,treecko,corsola,ditto,delibird,pachirisu,sunkern


In [33]:
# Transform Pokemon Colors Dataframe to have two columns, pokemon and color

pokemon_colors = pokemon_colors.reset_index(inplace = False)

pc_df = pokemon_colors.melt(id_vars='index', var_name='color', value_name='pokemon')
print(pc_df.sample(5)) # Check to ensure that the melt was done correctly and colors still match with the pokemon

     index  color    pokemon
0        0  black    murkrow
8        8  black    duskull
417     33  white  pheromosa
179     35   gray    grubbin
175     31   gray     kyurem


Merge Pokemon Colors to Pokemon Statistics Dataframe

In [39]:
final_df = pd.merge(clean_df, pc_df, on = 'pokemon')

MergeError: Passing 'suffixes' which cause duplicate columns {'index_x', 'color_x'} is not allowed.

In [40]:
clean_df

Unnamed: 0,pokemon,base_experience,height,name,order,weight,abilities,moves,types,index_x,color_x,index_y,color_y,index,color
0,abra,62,9,abra,103,195,"[synchronize, inner-focus, magic-guard]","[mega-punch, fire-punch, ice-punch, thunder-pu...",[psychic],7,brown,7,brown,7,brown
1,absol,163,12,absol,478,470,"[pressure, super-luck, justified]","[scratch, razor-wind, swords-dance, cut, headb...",[dark],8,white,8,white,8,white
2,aerodactyl,180,18,aerodactyl,232,590,"[rock-head, pressure, unnerve]","[razor-wind, wing-attack, whirlwind, fly, head...","[rock, flying]",10,purple,10,purple,10,purple
3,aipom,72,8,aipom,285,115,"[run-away, pickup, skill-link]","[double-slap, mega-punch, fire-punch, ice-punc...",[normal],12,purple,12,purple,12,purple
4,alcremie,173,3,alcremie,1053,5,"[sweet-veil, aroma-veil]","[tackle, hyper-beam, solar-beam, psychic, reco...",[fairy],46,white,46,white,46,white
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
462,zekrom,340,29,zekrom,769,3450,[teravolt],"[thunder-punch, cut, fly, hyper-beam, strength...","[dragon, electric]",16,black,16,black,16,black
463,zeraora,300,15,zeraora,988,445,[volt-absorb],"[mega-punch, pay-day, fire-punch, thunder-punc...",[electric],37,yellow,37,yellow,37,yellow
464,zigzagoon,56,4,zigzagoon,361,175,"[pickup, gluttony, quick-feet]","[cut, sand-attack, headbutt, tackle, body-slam...",[normal],24,brown,24,brown,24,brown
465,zorua,66,7,zorua,692,125,[illusion],"[scratch, swords-dance, cut, take-down, leer, ...",[dark],25,gray,25,gray,25,gray


In [41]:
clean_df = clean_df.drop(columns = ['index_x', 'color_x', 'index_y', 'color_y', 'index'])
clean_df

Unnamed: 0,pokemon,base_experience,height,name,order,weight,abilities,moves,types,color
0,abra,62,9,abra,103,195,"[synchronize, inner-focus, magic-guard]","[mega-punch, fire-punch, ice-punch, thunder-pu...",[psychic],brown
1,absol,163,12,absol,478,470,"[pressure, super-luck, justified]","[scratch, razor-wind, swords-dance, cut, headb...",[dark],white
2,aerodactyl,180,18,aerodactyl,232,590,"[rock-head, pressure, unnerve]","[razor-wind, wing-attack, whirlwind, fly, head...","[rock, flying]",purple
3,aipom,72,8,aipom,285,115,"[run-away, pickup, skill-link]","[double-slap, mega-punch, fire-punch, ice-punc...",[normal],purple
4,alcremie,173,3,alcremie,1053,5,"[sweet-veil, aroma-veil]","[tackle, hyper-beam, solar-beam, psychic, reco...",[fairy],white
...,...,...,...,...,...,...,...,...,...,...
462,zekrom,340,29,zekrom,769,3450,[teravolt],"[thunder-punch, cut, fly, hyper-beam, strength...","[dragon, electric]",black
463,zeraora,300,15,zeraora,988,445,[volt-absorb],"[mega-punch, pay-day, fire-punch, thunder-punc...",[electric],yellow
464,zigzagoon,56,4,zigzagoon,361,175,"[pickup, gluttony, quick-feet]","[cut, sand-attack, headbutt, tackle, body-slam...",[normal],brown
465,zorua,66,7,zorua,692,125,[illusion],"[scratch, swords-dance, cut, take-down, leer, ...",[dark],gray


In [1]:
clean_df.to_csv('Cleaned_Data.csv')

NameError: name 'clean_df' is not defined