In [309]:
import csv
import numpy as np
import pandas as pd

In [310]:
raw_dataframe = pd.read_csv("pokemon_raw.csv", delimiter=",")

Find if we can rely on pokedex_number to be a primary key for the table, by checking to see if any of the values are duplicated.

In [311]:
for index, row in enumerate(raw_dataframe['pokedex_number'].duplicated()):
    if row == True:
        print(f"Duplicate found {index}.")

Obtain a clean list of all unique types from both type columns, while also exluding empty columns. This also allows to check for spelling mistakes in the outputted list.

In [312]:
types_dataframe = raw_dataframe[['pokedex_number', 'type1', 'type2']];

type1_uniques = types_dataframe["type1"].unique().tolist()
type2_uniques = types_dataframe["type2"].unique().tolist()

unique_types = type1_uniques + type2_uniques
unique_types = list(set(unique_types))

for index, entry in enumerate(unique_types):
    if type(entry) == float:
        del unique_types[index]

unique_types

['ground',
 'fire',
 'dragon',
 'grass',
 'psychic',
 'fighting',
 'poison',
 'rock',
 'steel',
 'water',
 'fairy',
 'flying',
 'ice',
 'ghost',
 'normal',
 'dark',
 'electric',
 'bug']

Check to see if every entry has at least one type by checking for null values in each column.

In [313]:
types_dataframe.isnull().sum()

pokedex_number      0
type1               0
type2             384
dtype: int64

Split the two type columns into boolean identifiers so that the data can be more easily parsed into models, add these new columns to raw_dataframe rather than to the type_dataframe slice.

In [314]:
for pokemon_type in unique_types:
    raw_dataframe[f"type_{pokemon_type.lower()}"] = False

for index, row in raw_dataframe.iterrows():
    if row['type1'] in unique_types:
        raw_dataframe.loc[index, f"type_{row['type1'].lower()}"] = True

    if type(row['type2']) == float or row['type2'] not in unique_types: continue

    raw_dataframe.loc[index, f"type_{row['type2'].lower()}"] = True

raw_dataframe = raw_dataframe.drop('type1', axis=1)
raw_dataframe = raw_dataframe.drop('type2', axis=1)

print(raw_dataframe)


                       abilities  against_bug  against_dark  against_dragon  \
0    ['Overgrow', 'Chlorophyll']         1.00           1.0             1.0   
1    ['Overgrow', 'Chlorophyll']         1.00           1.0             1.0   
2    ['Overgrow', 'Chlorophyll']         1.00           1.0             1.0   
3       ['Blaze', 'Solar Power']         0.50           1.0             1.0   
4       ['Blaze', 'Solar Power']         0.50           1.0             1.0   
..                           ...          ...           ...             ...   
796              ['Beast Boost']         0.25           1.0             0.5   
797              ['Beast Boost']         1.00           1.0             0.5   
798              ['Beast Boost']         2.00           0.5             2.0   
799              ['Prism Armor']         2.00           2.0             1.0   
800               ['Soul-Heart']         0.25           0.5             0.0   

     against_electric  against_fairy  against_fight