### Going to gather pokemon data from https://pokemondb.net/pokedex/all
## It will be saved in a Pandas dataframe
#### The stats, and types, and their abilities and hidden abilities. Name Origin, A boolean column on whether they're a mega or not, and trying to find the starter pokemon of each generation


In [11]:
#Importing all our neccessary libraries
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict

## Getting the site as main_site

In [12]:
a = requests.get("https://pokemondb.net/pokedex/all")
main_site = bs(a.content)

# Going to get all the tablerows

In [13]:
table_body = main_site.select("tbody")[0]

In [14]:
table_rows = table_body.select("tr")

## Going to get the name of the pokemon in the table row

In [15]:
first_row = table_rows[0]

In [16]:
first_row.a.get_text()

'Bulbasaur'

In [17]:
first_ten_poke_names = [row.a.get_text() for row in table_rows[0:10]]

In [18]:
#first_ten_poke_names

### As you can see Charizard repeats three times, that's cause the last two are mega forms
### I'll need to write that they are a mega in there!

In [19]:
charzard = table_rows[8]

In [20]:
#On the website, there is small text under some of the pokemon
#This is the path to the small text
charzard.small.get_text()

'Mega Charizard Y'

### This is what I want to save the Charizard name as. Let's do it

In [21]:
first_ten = []
for row in table_rows[0:10]:
    """
    This will take the main pokemon name if it's not in the list already
    For example, Charizard is the main pokemon, but on the website there are seperate
    entries for Charizard's megas, so It would take the megas name
    """
    if row.a.get_text() not in first_ten:
        first_ten.append(row.a.get_text())
        
    #If it is in the list already, I will get the small text under it
    else:
        first_ten.append(row.small.get_text())

In [22]:
#first_ten

### It's working, now to use it for all of the names!

In [23]:
names = []
for row in table_rows:
    if row.a.get_text() not in names:
        names.append(row.a.get_text())
    else:
        names.append(row.a.get_text() + "(" + row.small.get_text() + ")")

In [24]:
names[8]

'Charizard(Mega Charizard Y)'

## Getting the Pokemon Number:

In [25]:
first_row.td.get_text()

'001'

In [26]:
#The pokemon numbers are given on the main data set
poke_nums = [row.td.get_text() for row in table_rows]

In [27]:
len(poke_nums)

1045

# Getting the Total of their stats:

In [28]:
int(first_row.find("td", attrs = {"class" : 'cell-total'}).get_text())

318

In [29]:
#The totals is part of the column of stats, I converted it to an int so we could do calculations with it
totals = [int(row.find("td",attrs = {'class': 'cell-total'}).get_text()) for row in table_rows]

### Getting the indivual stats, same process as the total one

In [30]:
first_row.findAll("td")[4].get_text() #HP

'45'

In [31]:
HP = [int(row.findAll("td")[4].get_text()) for row in table_rows]

In [32]:
#HP.pop(33)

In [33]:
first_row.findAll("td")[5].get_text() #Attack

'49'

In [34]:
Attack = [int(row.findAll("td")[5].get_text()) for row in table_rows]

In [35]:
#Attack.pop(33)

In [36]:
first_row.findAll("td")[6].get_text() #Defense

'49'

In [37]:
Defense = [int(row.findAll("td")[6].get_text()) for row in table_rows]

In [38]:
#Defense.pop(33)

In [39]:
first_row.findAll("td")[7].get_text() #Sp.Atk

'65'

In [40]:
Sp_Atk = [int(row.findAll("td")[7].get_text()) for row in table_rows]

In [41]:
#Sp_Atk.pop(33)

In [42]:
Sp_Def = [int(row.findAll("td")[8].get_text()) for row in table_rows]

In [43]:
#Sp_Def.pop(33)

In [44]:
Speed = [int(row.findAll("td")[9].get_text()) for row in table_rows]

In [45]:
#Speed.pop(30)

## Adding a column of True/False based on if the pokemon is Mega

In [46]:
#Going to take advantage of numpy's speed and create an array of bools, of whether the pokemon
#is a mega or not
np_names = np.array(names)

In [47]:
Mega = np.char.count(np_names,'Mega ') == 1

In [48]:
len(Mega)

1045

# Getting the First type of the pokemon

In [49]:
first_row.findAll('a', attrs = {"class" : "type-icon"})[0].get_text()

'Grass'

In [50]:
#Looping over each row and getting the text in the type-icon
Type_One = [row.findAll('a', attrs = {"class" : "type-icon"})[0].get_text()
               for row in table_rows]

# Getting the Second type of the Pokemon

In [51]:
Type_Two = []
"""
This is the same process, but some pokemon do not have a second type, so the length of 
their <a class = "type-icon"> is only 1, which means I can only index 0. To combat this, I used 
a try except to call the index
"""
for row in table_rows:
    try:
        Type_Two.append(row.findAll('a', attrs = {"class" : 'type-icon'})[1].get_text())
    except IndexError:
        Type_Two.append("None")

## Going to get the pokemon species

In [52]:
#I will need this to go to each pokemon's personal information page and get their species
main_link = "https://pokemondb.net"

In [53]:
species = []
"""
The a["href"] is the link to the pokemon's personal information. By concatinating it with the
main link, we can access the pokemon's informatio, which will provide more data on the specific 
pokemon, the data we're looking for in this case is the pokemon's species
"""
for i,rows in enumerate(table_rows):
    new_site = requests.get(main_link + table_rows[i].a['href'])
    pp = bs(new_site.content)
    a = pp.tbody
    species.append(a.findAll("td")[2].get_text())

In [55]:
#species

In [56]:
#Type_Two

## Going to get the Abilites

In [57]:
#This will be checked for each iteration of the loop, if true that means it's the main pokemon, if false, it means
#that it's a variation(Mega, Alolan, Galarian, other form) of the main pokemon
checked = []
#This will hold the first ability
first_abilities = []
#This will hold the second ability
second_abilities = []
#This will hold the hidden ability
hidden_abilities = []

#Look at how we will solve Darmintan problem (table_rows[655:662])
for i, row in enumerate(table_rows):
    #Partner Eevee and Pikachu do not have any abilities at all
    try:
        #Need to use a try, because not all rows have a .small
        if 'Partner' in row.small.get_text():
            first_abilities.append("None")
            second_abilities.append("None")
            hidden_abilities.append("None")
            continue
    except:
        pass
    if row.a.get_text() not in checked:
        #This is for pokemon like Charizard and Mewtwo with two mega_ver, and pokemon with different forms
        mega_ver = 0 
        
        #This is the site of the pokemon specific information, and we're going to the data-table on that page
        new_site = requests.get(main_link + row.a['href'])
        pp = bs(new_site.content)
        o = pp.findAll(attrs = {"class" : "text-muted"})  
        
        #This will get all the abilites, of the pokemon as strings
        abilities = [x.get_text() for x in o if x.a != None and 'ability' in x.a['href']]
        number_of_abilities = len(abilities)
        main_poke_abilities = []
        
        for i, ability in enumerate(abilities):
            #The first ability is labeled with a '1.' on the website
            if i != 0 and "1." in ability:
                break
            else:
                main_poke_abilities.append(ability)
                
        #The hidden abilities are seperated with hidden in their names 
        hidden_ability = [ability for ability in abilities if 'hidden' in ability]
        
        #This will have all the rest of the abilties, after removing the first one
        remaining_abilities = abilities[len(main_poke_abilities) :]
        
        checked.append(row.a.get_text())
        #Adding to the first_abilities list
        first_abilities.append(main_poke_abilities[0].lstrip("1. "))
        
        try:
            #This will append the second ability after checking that it's not the hidden ability
            if hidden_ability[0] != main_poke_abilities[1]:
                second_abilities.append(main_poke_abilities[1].lstrip("2. "))
                main_poke_abilities.pop(0)
            else:
                second_abilities.append("None")
        except IndexError:
            #In the case that there's no hidden ability, I cannot index it
            #Also, if there are is no second ability, I cannot index it
            second_abilities.append("None")
            
        if len(hidden_ability) != 0:
            hidden_abilities.append(hidden_ability[0].replace("(hidden ability)", "").strip())
        else:
            hidden_abilities.append("None") 
    else:
        #If the main pokemon name is already present in checked, this part will run
        #This is for all other forms of the pokemon(Megas, Alolan, Galarian, other)
        try:
            #Going to check if the pokemon's abilties, using an index, sometimes, the pokemon
            #has the same ability as its main form, so I assign it as that
            if "1." in remaining_abilities[mega_ver]:
                first_abilities.append(remaining_abilities[mega_ver].lstrip("1.").strip())
            else:
                first_abilities.append(remaining_abilities[0].lstrip("1.").strip())
            #remaining_abilities.pop(0)
        except:
            pass
        #Megas have no second/hidden abilities
        if 'Mega' in row.small.get_text():
            second_abilities.append("None")
            hidden_abilities.append("None")
        
        #elif len(remaining_abilities)
        
        else:
            #This is for every other form that's not a mega
            try: 
                if 'hidden' not in remaining_abilities[1] and "1." not in remaining_abilities[1]:
                    second_abilities.append(remaining_abilities[1].lstrip("2.").strip())
                elif "2." in remaining_abilities[2]:
                    second_abilities.append(remaining_abilities[2].lstrip("2.").strip())
                else:
                    second_abilities.append("None")
            except:
                second_abilities.append("None")
            try:
                hidden_abilities.append(hidden_ability[1].replace("(hidden ability)", "").strip())
            except:
                hidden_abilities.append("None")
        mega_ver += 1

In [4]:
#first_abilities

In [59]:
#'Partner' in table_rows[172].small.get_text()

### What generation was this pokemon introduced in

In [60]:
#Going to have to make all mega generation 6
#Going to have to make all alolan generation 7
#Going to have to make all Galarian generation 8
generation = []
for i,row in enumerate(table_rows):
    try:
        if "Mega" in row.small.get_text():
            generation.append("Generation 6")
        elif "Alolan" in row.small.get_text():
            generation.append("Generation 7")
        elif "Galarian" in row.small.get_text():
            generation.append("Generation 8")
        #This is for the Partner Pikachu and Eevee, which, once again, are their own special cases
        #Why do they have their own pokemon entry!
        else:
            new_site = requests.get(main_link + table_rows[i].a['href'])
            pp = bs(new_site.content)
            generation.append(pp.find("abbr").get_text())
    except:
        new_site = requests.get(main_link + table_rows[i].a['href'])
        pp = bs(new_site.content)
        generation.append(pp.find("abbr").get_text())

In [61]:
names[33]

'Pikachu(Partner Pikachu)'

In [62]:
#second_abilities

In [63]:
len(first_abilities)

1044

# Going to classify pokemon as legendary, sub legendary, or Mythical

## There is a website that has these classifications, so we're going to scrape it
### Totally uncessary, but its a review

In [64]:
legend_req = requests.get("https://www.serebii.net/pokemon/legendary.shtml")
legend_site = bs(legend_req.content)

In [65]:
tables = legend_site.findAll('table', attrs = {"class" : "trainer"})

In [66]:
legends = defaultdict(list)

In [67]:
sub = tables[0]
for name in sub.findAll("td", attrs = {"align" : "center"}):
    cur = name.find("a")
    try:
        # I have to put the type/ instead of type because there's actually a pokemon called type null!
        if "type/" not in cur['href'] and "ability" not in cur['href']:
            legends[tables[0].td.get_text()].append(cur.get_text())
    except:
        pass

In [68]:
sub = tables[1]
for name in sub.findAll("td", attrs = {"align" : "center"}):
    cur = name.find("a")
    try:
        # I have to put the type/ instead of type because there's actually a pokemon called type null!
        if "type/" not in cur['href'] and "ability" not in cur['href']:
            legends[tables[1].td.get_text()].append(cur.get_text())
    except:
        pass

In [69]:
sub = tables[2]
for name in sub.findAll("td", attrs = {"align" : "center"}):
    cur = name.find("a")
    try:
        # I have to put the type/ instead of type because there's actually a pokemon called type null!
        if "type/" not in cur['href'] and "ability" not in cur['href']:
            legends[tables[2].td.get_text()].append(cur.get_text())
    except:
        pass

In [70]:
legendary = legends['Legendary Pokémon']

In [71]:
mythicals = legends["Mythical Pokémon"]

In [72]:
sub_legendary = legends["Sub-Legendary Pokémon"]

In [73]:
is_legendary = []

In [74]:
is_mythical = []

In [75]:
is_sub = []

In [76]:
for row in table_rows:
    is_legendary.append(row.a.get_text() in legendary)
    is_mythical.append(row.a.get_text() in mythicals)
    is_sub.append(row.a.get_text() in sub_legendary)

In [77]:
#These new generation 8 pokemon, some how left the last one without it's ability
first_abilities.append("As One")

## Turning it into a pandas dataframe

In [78]:
info = { 
            'Names' : names,
            'Pokemon Number' : poke_nums,
            'Stat Total' : totals,
            'HP': HP,
            'Attack' : Attack,
            'Defense' : Defense,
            'Sp.Atk' : Sp_Atk,
            'Sp.Def' : Sp_Def,
            'Speed' : Speed,
            'Mega'  : Mega,
            'First Type' : Type_One,
            'Second Type' : Type_Two,
            'Species' : species,
            'First Ability' : first_abilities,
            'Second Ability' : second_abilities,
            'Hidden Ability' : hidden_abilities,
            'Generation' : generation,
            "Is Sub Legendary" : is_sub,
            "Is Legendary" : is_legendary,
            "Is Mythical" : is_mythical,
            }

In [79]:
poke_info = pd.DataFrame.from_dict(info)
#Make the line below inplace = True after gathering more data
poke_info.set_index('Names', inplace = True) 

In [80]:
test = poke_info

In [81]:
#first_100 = glance.iloc[:100, :]

NameError: name 'glance' is not defined

In [None]:
(336.25 + 395.5 + 573.75) / 3

In [None]:
10285 / 26

In [None]:
glance[glance["First Type"] == "Bug"]["Stat Total"].sum()

In [None]:
gen_1 = glance.pivot_table(values = "Stat Total", index = "First Type", 
                           columns = "Generation", fill_value = "No Pokemon", margins = True, aggfunc = np.mean)

In [None]:
"""
The values are all means, so the value in [Bug, Generation 1] is this mean for all the generation 1 pokemon
The [Bug, All] column is the mean for all the the Bug pokemon
"""
gen_1

#### Eventually we are going to simulate battles with this data. The pokemon with a faster speed would go first and do damage of Sp.Atk or Attack (Highest one). It would do damage based on it's first type, or second type (let user decide). And the defending pokemon would guard (Keep type effectiveness in mind). The damage done will be Hp - (Attack - (Defense * 3)). 
#### Might add some balance stuff, so legendaries or megas go last, or if the stat total is way higher (twice or 1.5x) then it would go last.

In [None]:
data = first_100.groupby("First Type").count()

In [None]:
types = [Type for Type, x in data.iterrows()]

In [None]:
amounts = [amount for amount in data['HP']]

In [None]:
len(types)

In [None]:
plt.figure(figsize = (12,6))
colors = ["olive","black","yellow", "fuchsia","sienna", "red", "limegreen", "peru", 
          "cyan", "lightgrey", "mediumorchid", "deeppink", "tan", "slategray", "dodgerblue" ]

plt.scatter(types,amounts, s = np.array(amounts) * 600, alpha = .9, c = colors)
plt.title("First 100 pokemon and their types")
plt.show()

In [None]:
data_300 = glance.groupby("First Type").count()

In [None]:
types_300 = [Type for Type, x in data_300.iterrows()]
amounts_300 = [amount for amount in data_300['HP']]

In [None]:
len(types_300)

In [None]:
plt.clf()

In [None]:
plt.figure(figsize = (14,6))
colors_300 = ["olive","black", "darkgoldenrod" , "yellow", "fuchsia","sienna", "red", "indigo", "limegreen", "peru", 
          "cyan", "lightgrey", "mediumorchid", "deeppink", "tan", "slategray", "dodgerblue" ]

plt.scatter(types_300, amounts_300, s = np.array(amounts_300) * 600, alpha = .9, c = colors_300)
plt.title("First 300 pokemon and their types")
plt.show()

In [None]:
glance[glance["Is Sub Legendary"] == True]

In [83]:
poke_info[poke_info["Mega"] == True]

Unnamed: 0_level_0,Pokemon Number,Stat Total,HP,Attack,Defense,Sp.Atk,Sp.Def,Speed,Mega,First Type,Second Type,Species,First Ability,Second Ability,Hidden Ability,Generation,Is Sub Legendary,Is Legendary,Is Mythical
Names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Venusaur(Mega Venusaur),3,625,80,100,123,122,120,80,True,Grass,Poison,Seed Pokémon,Thick Fat,,,Generation 6,False,False,False
Charizard(Mega Charizard X),6,634,78,130,111,130,85,100,True,Fire,Dragon,Flame Pokémon,Tough Claws,,,Generation 6,False,False,False
Charizard(Mega Charizard Y),6,634,78,104,78,159,115,100,True,Fire,Flying,Flame Pokémon,Drought,,,Generation 6,False,False,False
Blastoise(Mega Blastoise),9,630,79,103,120,135,115,78,True,Water,,Shellfish Pokémon,Mega Launcher,,,Generation 6,False,False,False
Beedrill(Mega Beedrill),15,495,65,150,40,15,80,145,True,Bug,Poison,Poison Bee Pokémon,Adaptability,,,Generation 6,False,False,False
Pidgeot(Mega Pidgeot),18,579,83,80,80,135,80,121,True,Normal,Flying,Bird Pokémon,No Guard,,,Generation 6,False,False,False
Alakazam(Mega Alakazam),65,600,55,50,65,175,105,150,True,Psychic,,Psi Pokémon,Trace,,,Generation 6,False,False,False
Slowbro(Mega Slowbro),80,590,95,75,180,130,80,30,True,Water,Psychic,Hermit Crab Pokémon,Shell Armor,,,Generation 6,False,False,False
Gengar(Mega Gengar),94,600,60,65,80,170,95,130,True,Ghost,Poison,Shadow Pokémon,Shadow Tag,,,Generation 6,False,False,False
Kangaskhan(Mega Kangaskhan),115,590,105,125,100,60,100,100,True,Normal,,Parent Pokémon,Parental Bond,,,Generation 6,False,False,False


In [85]:
poke_info.to_json("Pokemon Data.json")