# Imports

In [12]:
import pandas as pd
import numpy as np
import os
import glob
import unicodedata

# Data Loading

In [13]:
pokemon_data = pd.read_csv('pokemon.csv')
print(pokemon_data.shape)

(801, 41)


In [14]:
pokemon_data.columns

Index(['abilities', 'against_bug', 'against_dark', 'against_dragon',
       'against_electric', 'against_fairy', 'against_fight', 'against_fire',
       'against_flying', 'against_ghost', 'against_grass', 'against_ground',
       'against_ice', 'against_normal', 'against_poison', 'against_psychic',
       'against_rock', 'against_steel', 'against_water', 'attack',
       'base_egg_steps', 'base_happiness', 'base_total', 'capture_rate',
       'classfication', 'defense', 'experience_growth', 'height_m', 'hp',
       'japanese_name', 'name', 'percentage_male', 'pokedex_number',
       'sp_attack', 'sp_defense', 'speed', 'type1', 'type2', 'weight_kg',
       'generation', 'is_legendary'],
      dtype='object')

In [15]:
pokemon_data['generation'].value_counts()

5    156
1    151
3    135
4    107
2    100
7     80
6     72
Name: generation, dtype: int64

In [16]:
pokemon_data['type1'].unique()

array(['grass', 'fire', 'water', 'bug', 'normal', 'poison', 'electric',
       'ground', 'fairy', 'fighting', 'psychic', 'rock', 'ghost', 'ice',
       'dragon', 'dark', 'steel', 'flying'], dtype=object)

# Information Retrieval

In [17]:
current_working_directory = os.getcwd()

In [18]:
def remove_accents(input_str):
    nfkd_form = unicodedata.normalize('NFKD', input_str)
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])

def to_normal_text(text):
    txt = text.lower()
    txt = remove_accents(txt)
    txt = txt.replace(' ', '-')
    txt = txt.replace(':', '')
    txt= txt.replace('.', '')
    txt = txt.replace("'", '')
    txt = txt.replace('♀', '-f')
    txt = txt.replace('♂', '-m')
    return txt

def get_image_path(pokemon_name, return_None=True):
    pokemon_name = to_normal_text(pokemon_name)
    if os.path.exists(f'References/Pokemons-Go/assets/{pokemon_name}.png'):
        return f'References/Pokemons-Go/assets/{pokemon_name}.png'
    elif os.path.exists(f'References/Pokemons-Go/assets/{pokemon_name}.jpg'):
        return f'References/Pokemons-Go/assets/{pokemon_name}.jpg'
    else:  
        img_list = glob.glob(current_working_directory + f"/References/Pokemons-Go/assets/{pokemon_name}*.png", recursive=True)
        if(len(img_list)==1):
            return img_list[0][len(current_working_directory):].replace('\\', '/')
        img_list = glob.glob(current_working_directory + f"/References/Pokemons-Go/assets/{pokemon_name}*.jpg", recursive=True)
        if(len(img_list)==1):
            return img_list[0][len(current_working_directory):].replace('\\', '/')
        
        if return_None:
            return None
        else:
            return 

In [19]:
get_image_path('Zygarde')

'/References/Pokemons-Go/assets/zygarde-50.png'

In [20]:
num_images = len(os.listdir('References/Pokemons-Go/assets'))
image_info = {}
image_info['total_images'] = num_images
image_info['no_img_pokemons'] = []

for pok_name in pokemon_data['name']:
    if(get_image_path(pok_name) is None):
        image_info['no_img_pokemons'].append(pok_name)

In [21]:
image_info

{'total_images': 831, 'no_img_pokemons': []}

# Preprocessing 

In [27]:
null_cols = pokemon_data.isnull().sum()
for col in null_cols.keys():
    if(null_cols[col]>0):
        print(f"{col}  -->  {null_cols[col]} nulls")

height_m  -->  20 nulls
percentage_male  -->  98 nulls
type2  -->  384 nulls
weight_kg  -->  20 nulls
