In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By

import numpy as np
import pandas as pd
import matplotlib as plt



driver = webdriver.Chrome('drivers/chromedriver.exe') # assign the driver path to variable

driver.get('https://pokemondb.net/pokedex/all') # get request - opens chrome browser and navigates to URL

driver.minimize_window() # minimize window


pokemon_id = []
pokemon_id_html = driver.find_elements(By.CLASS_NAME, 'infocard-cell-data') # retrieve the pokemon id column from pokemondb.net
for poke_id in pokemon_id_html:
    pokemon_id.append(poke_id.text)

pokemon_name = []
pokemon_name_html = driver.find_elements(By.CLASS_NAME, 'ent-name') # retrieve the pokemon name column
for name in pokemon_name_html:
    pokemon_name.append(name.text)

pokemon_type = []
pokemon_type_html = driver.find_elements(By.CLASS_NAME, 'cell-icon') # retrieve pokemon type
for p_type in pokemon_type_html:
    pokemon_type.append(p_type.text)

pokemon_total = []
pokemon_total_html = driver.find_elements(By.CLASS_NAME, 'cell-total') # retrieve pokemon total stats
for total in pokemon_total_html:
    pokemon_total.append(total.text)

pokemon_hp = []
pokemon_hp_html = driver.find_elements(By.XPATH, "//*[@class='cell-num'][1]") # retrieve pokemon hp stat
for hp in pokemon_hp_html:
    pokemon_hp.append(hp.text)

pokemon_attack = []
pokemon_attack_html = driver.find_elements(By.XPATH, "//*[@class='cell-num'][2]") # retrieve pokemon attack stat
for attack in pokemon_attack_html:
    pokemon_attack.append(attack.text)

pokemon_defense = []
pokemon_defense_html = driver.find_elements(By.XPATH, "//*[@class='cell-num'][3]") # retrieve pokemon defense stat
for defense in pokemon_defense_html:
    pokemon_defense.append(defense.text)

pokemon_special_attack = []
pokemon_special_attack_html = driver.find_elements(By.XPATH, "//*[@class='cell-num'][4]") # retrieve pokemon sp. attack stat
for special_attack in pokemon_special_attack_html:
    pokemon_special_attack.append(special_attack.text)

pokemon_special_defense = []
pokemon_special_defense_html = driver.find_elements(By.XPATH, "//*[@class='cell-num'][5]") # retrieve pokemon sp. defense stat
for special_defense in pokemon_special_defense_html:
    pokemon_special_defense.append(special_defense.text)

pokemon_speed = []
pokemon_speed_html = driver.find_elements(By.XPATH, "//*[@class='cell-num'][6]") # retrieve pokemon speed stat
for speed in pokemon_speed_html:
    pokemon_speed.append(speed.text)


driver.close()  # close driver, end session

columns = ['id', 'name', 'type', 'total', 'hp', 'attack', 'defense', 'special-attack', 'special-defense', 'speed'] # column names (labels) for dataset

attributes = [pokemon_id, pokemon_name, pokemon_type, pokemon_total, pokemon_hp, pokemon_attack, pokemon_defense, pokemon_special_attack, pokemon_special_defense, pokemon_speed] # list of values for each column (rows) for dataset

  driver = webdriver.Chrome('drivers/chromedriver.exe') # assign the driver path to variable


In [2]:
for i in attributes: # return the total number of entries for each attribute
    print(len(i))

1075
1075
1075
1075
1075
1075
1075
1075
1075
1075


In [3]:
df = pd.DataFrame(attributes) # create dataframe with column and row data
df = df.swapaxes("index", "columns") # swap axes to make the attribute labels the columns

In [4]:
df.columns = columns # give columns labels

Run this code block to reset dataset!  

In [5]:
# convert file to csv 
import os  
os.makedirs('C:/Users/Salah/Documents/apps/pokedex-app/datasets', exist_ok=True) # create new folder in project directory to store dataset  
df.to_csv("datasets/pokedex.csv") # converts dataset to csv file
df = pd.read_csv('datasets/pokedex.csv') # read and save file and check the first 5 entries
df.head()

Unnamed: 0.1,Unnamed: 0,id,name,type,total,hp,attack,defense,special-attack,special-defense,speed
0,0,1,Bulbasaur,GRASS\nPOISON,318,45,49,49,65,65,45
1,1,2,Ivysaur,GRASS\nPOISON,405,60,62,63,80,80,60
2,2,3,Venusaur,GRASS\nPOISON,525,80,82,83,100,100,80
3,3,3,Venusaur,GRASS\nPOISON,625,80,100,123,122,120,80
4,4,4,Charmander,FIRE,309,39,52,43,60,50,65


In [6]:
df = df.drop(['Unnamed: 0'], axis=1) # drop 'unnamed: 0' column and edit 'type' column
df.type = df.type.str.replace('\n', '/') # replace all '\n' strings in type with '/' for multi type pokemon
df.head()

Unnamed: 0,id,name,type,total,hp,attack,defense,special-attack,special-defense,speed
0,1,Bulbasaur,GRASS/POISON,318,45,49,49,65,65,45
1,2,Ivysaur,GRASS/POISON,405,60,62,63,80,80,60
2,3,Venusaur,GRASS/POISON,525,80,82,83,100,100,80
3,3,Venusaur,GRASS/POISON,625,80,100,123,122,120,80
4,4,Charmander,FIRE,309,39,52,43,60,50,65


In [7]:
is_mega_evolved_list = df.duplicated(subset='name') # obtain list of Mega pokemon 
df.name = [name if duplicated == False else 'Mega ' + name for duplicated, name in zip(is_mega_evolved_list, df.name)] # if pokemon is mega, add 'Mega' prefix to name
df.head(10) # check the first 10 values for accuracy

Unnamed: 0,id,name,type,total,hp,attack,defense,special-attack,special-defense,speed
0,1,Bulbasaur,GRASS/POISON,318,45,49,49,65,65,45
1,2,Ivysaur,GRASS/POISON,405,60,62,63,80,80,60
2,3,Venusaur,GRASS/POISON,525,80,82,83,100,100,80
3,3,Mega Venusaur,GRASS/POISON,625,80,100,123,122,120,80
4,4,Charmander,FIRE,309,39,52,43,60,50,65
5,5,Charmeleon,FIRE,405,58,64,58,80,65,80
6,6,Charizard,FIRE/FLYING,534,78,84,78,109,85,100
7,6,Mega Charizard,FIRE/DRAGON,634,78,130,111,130,85,100
8,6,Mega Charizard,FIRE/FLYING,634,78,104,78,159,115,100
9,7,Squirtle,WATER,314,44,48,65,50,64,43


In [19]:
is_double_mega_list = df.duplicated(subset='name')
is_double_mega_list.shape
#np.where(is_double_mega_list)[True]
#is_double_mega_list.loc[1]
#df.name = [name if duplicated == False else df.name.str.replace("Mega ", "") for duplicated, name in zip(is_double_mega_list, df.name)]

(1075,)

In [20]:
df.head(10)

Unnamed: 0,id,name,type,total,hp,attack,defense,special-attack,special-defense,speed
0,1,Bulbasaur,GRASS/POISON,318,45,49,49,65,65,45
1,2,Ivysaur,GRASS/POISON,405,60,62,63,80,80,60
2,3,Venusaur,GRASS/POISON,525,80,82,83,100,100,80
3,3,Mega Venusaur,GRASS/POISON,625,80,100,123,122,120,80
4,4,Charmander,FIRE,309,39,52,43,60,50,65
5,5,Charmeleon,FIRE,405,58,64,58,80,65,80
6,6,Charizard,FIRE/FLYING,534,78,84,78,109,85,100
7,6,Mega Charizard,FIRE/DRAGON,634,78,130,111,130,85,100
8,6,Mega Charizard,FIRE/FLYING,634,78,104,78,159,115,100
9,7,Squirtle,WATER,314,44,48,65,50,64,43


In [21]:
df.set_index("id", inplace=True) # set the id column to be the index
df.head()

Unnamed: 0_level_0,name,type,total,hp,attack,defense,special-attack,special-defense,speed
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,Bulbasaur,GRASS/POISON,318,45,49,49,65,65,45
2,Ivysaur,GRASS/POISON,405,60,62,63,80,80,60
3,Venusaur,GRASS/POISON,525,80,82,83,100,100,80
3,Mega Venusaur,GRASS/POISON,625,80,100,123,122,120,80
4,Charmander,FIRE,309,39,52,43,60,50,65


In [None]:
# account for mega charizard and mewtwo cases
# organize run files
# speed scraping times

In [24]:
pokemon_lookup = input("Type the name of the pokemon you want to search up: ") # Ask the user to input the pokemon they want to search
pokemon_lookup = pokemon_lookup.capitalize() # capitalize first letter of entry to match database

while pokemon_lookup not in pokemon_name:   # ensure the user is typing in valid input, if not create a validation loop by reiterating user to give valid pokemon name or id.
    pokemon_lookup = input("That pokemon was not found, please try again: ") # validation look/error trap
    pokemon_lookup = pokemon_lookup.capitalize()

print("Searching for {} ... ".format(pokemon_lookup))


if pokemon_lookup in pokemon_name:
    poke_stats = df.loc[df['name'] == pokemon_lookup] # return pokemon stats to user if successfully located
poke_stats

Searching for Charmander ... 


Unnamed: 0_level_0,name,type,total,hp,attack,defense,special-attack,special-defense,speed
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
4,Charmander,FIRE,309,39,52,43,60,50,65


In [25]:
df.head(10)

Unnamed: 0_level_0,name,type,total,hp,attack,defense,special-attack,special-defense,speed
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,Bulbasaur,GRASS/POISON,318,45,49,49,65,65,45
2,Ivysaur,GRASS/POISON,405,60,62,63,80,80,60
3,Venusaur,GRASS/POISON,525,80,82,83,100,100,80
3,Mega Venusaur,GRASS/POISON,625,80,100,123,122,120,80
4,Charmander,FIRE,309,39,52,43,60,50,65
5,Charmeleon,FIRE,405,58,64,58,80,65,80
6,Charizard,FIRE/FLYING,534,78,84,78,109,85,100
6,Mega Charizard,FIRE/DRAGON,634,78,130,111,130,85,100
6,Mega Charizard,FIRE/FLYING,634,78,104,78,159,115,100
7,Squirtle,WATER,314,44,48,65,50,64,43


# Just going to run a bit of analysis on pokedex.csv df

Questions:
- What pokemon has the highest stat total?
- What pokemon type has the highest average stat total?
- What is the highest average stat for each type?
- What pokemon has the highest of each stat?
- How many pokemon have 500 stat totals?
- How many pokemon have multiple stats over 100?

In [26]:
df.describe()

Unnamed: 0,total,hp,attack,defense,special-attack,special-defense,speed
count,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0
mean,440.697674,70.275349,80.847442,74.76093,73.296744,72.382326,69.134884
std,121.84133,26.642689,32.408889,31.161763,32.86214,28.005817,30.220442
min,175.0,1.0,5.0,5.0,10.0,20.0,5.0
25%,330.0,50.0,56.0,50.5,50.0,50.0,45.0
50%,460.0,68.0,80.0,70.0,65.0,70.0,65.0
75%,516.0,83.5,100.0,90.0,95.0,90.0,90.0
max,1125.0,255.0,190.0,250.0,194.0,250.0,200.0


In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1075 entries, 1 to 905
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   name             1075 non-null   object
 1   type             1075 non-null   object
 2   total            1075 non-null   int64 
 3   hp               1075 non-null   int64 
 4   attack           1075 non-null   int64 
 5   defense          1075 non-null   int64 
 6   special-attack   1075 non-null   int64 
 7   special-defense  1075 non-null   int64 
 8   speed            1075 non-null   int64 
dtypes: int64(7), object(2)
memory usage: 84.0+ KB


In [28]:
df.shape

(1075, 9)