In [2]:
# Libraries
import requests
import os
import pandas as pd

# Not used yet, but imported for possible later use
import json

In [13]:
# Load CSV based on generation number; return error message for invalid val
def load_poke_csv(gen_val):
    try:
        gen_df = pd.read_csv(f'../data/gen{gen_val}_pokemon_stats.csv', sep=',')
    except FileNotFoundError:
        print(f"Invalid generation number: {gen_val}")
        return
    return gen_df

In [62]:
# Load CSV into DataFrame
gen1_df = load_poke_csv(1)

In [63]:
# Do some exploratory DataFrame review by...
# Returning first 10 entries, sorted by HP in descending order:
gen1_df.sort_values(by=['hp'], ascending=False).head(10)  

Unnamed: 0,hp,attack,defense,special-attack,special-defense,speed,type_1,type_2,Pokemon,totalstat
112,250,5,5,35,105,50,normal,,chansey,450
142,160,110,65,65,110,30,normal,,snorlax,540
39,140,70,45,85,50,45,normal,fairy,wigglytuff,435
130,130,85,80,85,95,60,water,ice,lapras,535
133,130,65,60,110,95,65,water,,vaporeon,525
38,115,45,20,45,25,20,normal,fairy,jigglypuff,270
149,106,110,90,154,90,130,psychic,,mewtwo,680
88,105,105,75,65,100,50,poison,,muk,500
111,105,130,120,45,45,40,ground,rock,rhydon,485
114,105,95,80,40,80,90,normal,,kangaskhan,490


In [64]:
# Demonstrate that we know how to handle NaN values:
gen1_df['type_2'] = gen1_df['type_2'].fillna('N/A')
gen1_df.head()

Unnamed: 0,hp,attack,defense,special-attack,special-defense,speed,type_1,type_2,Pokemon,totalstat
0,45,49,49,65,65,45,grass,poison,bulbasaur,318
1,60,62,63,80,80,60,grass,poison,ivysaur,405
2,80,82,83,100,100,80,grass,poison,venusaur,525
3,39,52,43,60,50,65,fire,,charmander,309
4,58,64,58,80,65,80,fire,,charmeleon,405


In [65]:
# Demonstrate case changing:
gen1_df['type_1'] = gen1_df['type_1'].str.upper()
gen1_df['type_2'] = gen1_df['type_2'].str.upper()
gen1_df['Pokemon'] = gen1_df['Pokemon'].str.title()
gen1_df.tail()

Unnamed: 0,hp,attack,defense,special-attack,special-defense,speed,type_1,type_2,Pokemon,totalstat
146,41,64,45,50,50,50,DRAGON,,Dratini,300
147,61,84,65,70,70,70,DRAGON,,Dragonair,420
148,91,134,95,100,100,80,DRAGON,FLYING,Dragonite,600
149,106,110,90,154,90,130,PSYCHIC,,Mewtwo,680
150,100,100,100,100,100,100,PSYCHIC,,Mew,600


In [66]:
# Demonstrate filtering (e.g. by min value threshold):
gen1_df.loc[gen1_df['totalstat'] >= 500].head()

Unnamed: 0,hp,attack,defense,special-attack,special-defense,speed,type_1,type_2,Pokemon,totalstat
2,80,82,83,100,100,80,GRASS,POISON,Venusaur,525
5,78,84,78,109,85,100,FIRE,FLYING,Charizard,534
8,79,83,100,85,105,78,WATER,,Blastoise,530
30,90,92,87,75,85,76,POISON,GROUND,Nidoqueen,505
33,81,102,77,85,75,85,POISON,GROUND,Nidoking,505


In [67]:
# Add new column (e.g. joint "power" value):
gen1_df['power'] = (gen1_df['attack'] + gen1_df['special-attack']) // 2
gen1_df.head()

Unnamed: 0,hp,attack,defense,special-attack,special-defense,speed,type_1,type_2,Pokemon,totalstat,power
0,45,49,49,65,65,45,GRASS,POISON,Bulbasaur,318,57
1,60,62,63,80,80,60,GRASS,POISON,Ivysaur,405,71
2,80,82,83,100,100,80,GRASS,POISON,Venusaur,525,91
3,39,52,43,60,50,65,FIRE,,Charmander,309,56
4,58,64,58,80,65,80,FIRE,,Charmeleon,405,72


In [77]:
# Drop columns based on defined criteria:
gen1_df = gen1_df.drop(gen1_df[(gen1_df.totalstat < 500) & (gen1_df.power < 85)].index)
print("Number of Pokemon remaining: " + str(len(gen1_df)))
gen1_df.head()

Number of Pokemon remaining: 48


Unnamed: 0,hp,attack,defense,special-attack,special-defense,speed,type_1,type_2,Pokemon,totalstat,power
2,80,82,83,100,100,80,GRASS,POISON,Venusaur,525,91
5,78,84,78,109,85,100,FIRE,FLYING,Charizard,534,96
8,79,83,100,85,105,78,WATER,,Blastoise,530,84
25,60,90,55,90,80,110,ELECTRIC,,Raichu,485,90
30,90,92,87,75,85,76,POISON,GROUND,Nidoqueen,505,83


In [78]:
# Put revised data into new CSV:
gen1_df.to_csv('../data/gen1_EDA_demo.csv', index=False)