In [2]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
from bs4 import BeautifulSoup
import requests

In [117]:
pokemon = pd.DataFrame(columns = ["number", "name", "type", "ability", "hidden_ability", 
                                 "gender_ratio", "hatch_cycle", "Base_Experience_Yield", "height", "weight",
                                 "leveling_rate", "color", "base_friendship"], index = range(1, 900))
pokemon

Unnamed: 0,number,name,type,ability,hidden_ability,gender_ratio,hatch_cycle,Base_Experience_Yield,height,weight,leveling_rate,color,base_friendship
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
5,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
895,,,,,,,,,,,,,
896,,,,,,,,,,,,,
897,,,,,,,,,,,,,
898,,,,,,,,,,,,,


In [149]:
# Creating a function for scraping pokemon data

def scrap_pokemon_details(pokemon_name):
    """
    scrape details of pokemon from bulbapedia

    Args :
        pokemon_name(str) name of the pokemon, eg : Greninja 

    Returns:
        A dictionary will all the details 
    """
    
    try: 
        # Give URL 
        url = f"https://bulbapedia.bulbagarden.net/wiki/{pokemon_name}_(Pok%C3%A9mon)"
        
        # fetching the webpage 
        response = requests.get(url)
        response.raise_for_status()
        text = response.text
        
        # Creating a BeautifulSoup instance 
        soup = BeautifulSoup(text, "lxml")
        
        # Overall data 
        data = soup.find_all("td", class_ = "roundy")
        
        if len(data) < 40 :
            print(f"Don't have enought data for {pokemon_data}")
            return None
        
        # name, number, category
        basic = data[0]
        name = basic.find(["big", "b"]).text
        category = basic.find(["span"]).text
        number = soup.find(["th","span", "big"], class_ = "roundy").text.replace("\n", "")
        
        # Type 
        type_data = data[1]
        type_data_2 =  type_data.find_all(["span", "a"])
        type_ = type_data_2[2].text, type_data_2[4].text
        
        # ability and hidden ability
        ability_data = data[2]
        ability_data_2 = ability_data.find_all(["a", "span"])
        ability = ability_data_2[2].text
        hidden_ability = ability_data_2[8].text
        
        # gender ratio
        gender_data = data[3]
        gender_data_2 = gender_data.find_all(["a"])
        gender_ratio = gender_data_2[1].text
        
        # Catch rate 
        catch_rate = data[4].find("small").text
        
        # egg group and hatch cycle 
        egg_group_data = data[5]
        egg_group = egg_group_data.find("table", class_ = "roundy").text.strip()
        hatch_cycle_data = data[5].find_all("table", class_ = "roundy")
        hatch_cycle = hatch_cycle_data[1].text.strip().replace("\xa0", "")
        
        # Height and Weight 
        height_data = data[6].find_all("tr")
        height_data_2 = height_data[0].find_all("td")
        height = height_data_2[1].text.replace("\n", "")
        weight_data = data[7].find_all("tr")
        weight_data_2 = weight_data[0].find_all("td")
        weight = weight_data_2[1].text.replace("\n", "")
        
        # Base Experience Yield 
        bey_data = data[9].find("tr")
        bey_data_2 = bey_data.find_all("td")
        bey = bey_data_2[2].text.replace("V+", "").replace("\n", "")
        
        # Other details 
        leveling_rate = data[10].tr.text.strip()
        color = data[38].td.text.strip()
        base_friendship = data[39].tr.text.strip()
        
        return {"number" : number, "name" : name, "type" : type_, "ability" : ability, "hidden_ability" : hidden_ability, 
                 "gender_ratio" : gender_ratio, "hatch_cycle" : hatch_cycle, "Base_Experience_Yield" : bey, "leveling_rate" : leveling_rate,
                 "color" : color, "base_friendship" : base_friendship, "height" : height, "weight" : weight}
            
    except requests.RequestException as e:
        print(f"Failed to fetch data for {pokemon_name}: {e}")
        return None
    except (AttributeError, IndexError) as e:
        print(f"Failed to parse data for {pokemon_name}: {e}")
        return None

            

In [150]:
bulbasaur_data = scrap_pokemon_details("Greninja")
print(bulbasaur_data)

{'number': '#0658', 'name': 'Greninja', 'type': ('Water', 'Dark'), 'ability': 'Torrent', 'hidden_ability': 'Protean', 'gender_ratio': '87.5% male, 12.5% female', 'hatch_cycle': '20cycles', 'Base_Experience_Yield': '239', 'leveling_rate': 'Medium Slow', 'color': 'Blue', 'base_friendship': '70', 'height': '1.5 m', 'weight': '40.0 kg'}


In [132]:
# Bulbasaur 
text1 = requests.get("https://bulbapedia.bulbagarden.net/wiki/Bulbasaur_(Pok%C3%A9mon)").text
soup1 = BeautifulSoup(text1, "lxml")
data1 = soup1.find_all("td", class_ = "roundy")
basic1 = data1[0]
name1 = basic1.find(["big", "b"]).text
category1 = basic1.find(["span"]).text
number1 = soup1.find(["th","span", "big"], class_ = "roundy").text.replace("\n", "")
type_data1 = data1[1]
type_data1_2 =  type_data1.find_all(["span", "a"])
type1 = type_data1_2[2].text, type_data1_2[4].text
ability_data1 = data1[2]
ability_data1_2 = ability_data1.find_all(["a", "span"])
ability1 = ability_data1_2[2].text
hidden_ability1 = ability_data1_2[8].text
gender_data1 = data1[3]
gender_data1_2 = gender_data1.find_all(["a"])
gender_ratio1 = gender_data1_2[1].text
catch_rate1 = data1[4].find("small").text
egg_group_data1 = data1[5]
egg_group1 = egg_group_data1.find("table", class_ = "roundy").text.strip()
hatch_cycle_data1 = data1[5].find_all("table", class_ = "roundy")
hatch_cycle1 = hatch_cycle_data1[1].text.strip()
height_data1 = data1[6].find_all("tr")
height_data1_2 = height_data1[0].find_all("td")
height1 = height_data1_2[1].text.replace("\n", "")
weight_data1 = data1[7].find_all("tr")
weight_data1_2 = weight_data1[0].find_all("td")
weight1 = weight_data1_2[1].text.replace("\n", "")
bey_data1 = data1[9].find("tr")
bey_data1_2 = bey_data1.find_all("td")
bey1 = bey_data1_2[2].text.replace("V+", "").replace("\n", "")
leveling_rate1 = data1[10].tr.text.strip()
color1 = data1[38].td.text.strip()
base_friendship1 = data1[39].tr.text.strip()
pokemon.loc[1] = {"number" : number1, "name" : name1, "type" : type1, "ability" : ability1, "hidden_ability" : hidden_ability1, 
                 "gender_ratio" : gender_ratio1, "hatch_cycle" : hatch_cycle1, "Base_Experience_Yield" : bey1, "leveling_rate" : leveling_rate1,
                 "color" : color1, "base_friendship" : base_friendship1, "height" : height1, "weight" : weight1}

In [135]:
pokemon

Unnamed: 0,number,name,type,ability,hidden_ability,gender_ratio,hatch_cycle,Base_Experience_Yield,height,weight,leveling_rate,color,base_friendship
1,#0001,Bulbasaur,"(Grass, Poison)",Overgrow,Chlorophyll,"87.5% male, 12.5% female",20 cycles,64,0.7 m,6.9 kg,Medium Slow,Green,70
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
5,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
895,,,,,,,,,,,,,
896,,,,,,,,,,,,,
897,,,,,,,,,,,,,
898,,,,,,,,,,,,,


In [140]:
# Ivysaur
text2 = requests.get("https://bulbapedia.bulbagarden.net/wiki/Ivysaur_(Pok%C3%A9mon)").text
soup2 = BeautifulSoup(text2, "lxml")
data2 = soup2.find_all("td", class_ = "roundy")
basic2 = data2[0]
name2 = basic2.find(["big", "b"]).text
category2 = basic2.find(["span"]).text
number2 = soup2.find(["th","span", "big"], class_ = "roundy").text.replace("\n", "")
type_data2 = data2[1]
type_data2_2 =  type_data2.find_all(["span", "a"])
type2 = type_data2_2[2].text, type_data2_2[4].text
ability_data2 = data2[2]
ability_data2_2 = ability_data2.find_all(["a", "span"])
ability2 = ability_data2_2[2].text
hidden_ability2 = ability_data2_2[8].text
gender_data2 = data2[3]
gender_data2_2 = gender_data2.find_all(["a"])
gender_ratio2 = gender_data2_2[1].text
catch_rate2 = data2[4].find("small").text
egg_group_data2 = data2[5]
egg_group2 = egg_group_data2.find("table", class_ = "roundy").text.strip()
hatch_cycle_data2 = data2[5].find_all("table", class_ = "roundy")
hatch_cycle2 = hatch_cycle_data2[1].text.strip()
height_data2 = data2[6].find_all("tr")
height_data2_2 = height_data2[0].find_all("td")
height2 = height_data2_2[1].text.replace("\n", "")
weight_data2 = data2[7].find_all("tr")
weight_data2_2 = weight_data2[0].find_all("td")
weight2 = weight_data2_2[1].text.replace("\n", "")
bey_data2 = data2[9].find("tr")
bey_data2_2 = bey_data2.find_all("td")
bey2 = bey_data2_2[2].text.replace("V+", "").replace("\n", "")
leveling_rate2 = data2[10].tr.text.strip()
color2 = data2[38].td.text.strip()
base_friendship2 = data2[39].tr.text.strip()
pokemon.loc[2] = {"number" : number2, "name" : name2, "type" : type2, "ability" : ability2, "hidden_ability" : hidden_ability2, 
                 "gender_ratio" : gender_ratio2, "hatch_cycle" : hatch_cycle2, "Base_Experience_Yield" : bey2, "leveling_rate" : leveling_rate2,
                 "color" : color2, "base_friendship" : base_friendship2, "height" : height2, "weight" : weight2}

In [None]:
# Venasaur 
text3 = requests.get("https://bulbapedia.bulbagarden.net/wiki/Venusaur_(Pok%C3%A9mon)").text
soup3 = BeautifulSoup(text3, "lxml")
data3 = soup3.find_all("td", class_ = "roundy")
basic3 = data3[0]
name3 = basic3.find(["big", "b"]).text
category3 = basic3.find(["span"]).text
number3 = soup3.find(["th","span", "big"], class_ = "roundy").text.replace("\n", "")
type_data3 = data3[1]
type_data3_2 =  type_data3.find_all(["span", "a"])
type3 = type_data3_2[2].text, type_data3_2[4].text
ability_data3 = data3[2]
ability_data3_2 = ability_data3.find_all(["a", "span"])
ability3 = ability_data3_2[2].text
hidden_ability3 = ability_data3_2[8].text
gender_data2 = data2[3]
gender_data2_2 = gender_data2.find_all(["a"])
gender_ratio2 = gender_data2_2[1].text
catch_rate2 = data2[4].find("small").text
egg_group_data2 = data2[5]
egg_group2 = egg_group_data2.find("table", class_ = "roundy").text.strip()
hatch_cycle_data2 = data2[5].find_all("table", class_ = "roundy")
hatch_cycle2 = hatch_cycle_data2[1].text.strip()
height_data2 = data2[6].find_all("tr")
height_data2_2 = height_data2[0].find_all("td")
height2 = height_data2_2[1].text.replace("\n", "")
weight_data2 = data2[7].find_all("tr")
weight_data2_2 = weight_data2[0].find_all("td")
weight2 = weight_data2_2[1].text.replace("\n", "")
bey_data2 = data2[9].find("tr")
bey_data2_2 = bey_data2.find_all("td")
bey2 = bey_data2_2[2].text.replace("V+", "").replace("\n", "")
leveling_rate2 = data2[10].tr.text.strip()
color2 = data2[38].td.text.strip()
base_friendship2 = data2[39].tr.text.strip()
pokemon.loc[2] = {"number" : number2, "name" : name2, "type" : type2, "ability" : ability2, "hidden_ability" : hidden_ability2, 
                 "gender_ratio" : gender_ratio2, "hatch_cycle" : hatch_cycle2, "Base_Experience_Yield" : bey2, "leveling_rate" : leveling_rate2,
                 "color" : color2, "base_friendship" : base_friendship2, "height" : height2, "weight" : weight2}