# Getting data directly from a website
This notebook walks you through some steps in collecting data from [Bulbapedia's National Pokedex](https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_by_National_Pok%C3%A9dex_number) using `requests` and `BeautifulSoup`

### Import `requests` library
This package allows you to get any website's HTML code so that you can extract from it. Let's save the website's URL in the `URL` variable.

In [None]:
import requests
import json

URL="https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_by_National_Pok%C3%A9dex_number"

### Load the page

In [None]:
page=requests.get(URL)

### Parse HTML data

In [None]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(page.content, 'html.parser')

### Find all tables that contain Pokemon details

In [None]:
# Get main content <div>
poke_content=soup.find(id='mw-content-text')

# Get all <table> elements
poke_tables=poke_content.find_all('table')

### Get list of All Generation Pokemons

In [None]:
gen1_list=poke_tables[1]
gen2_list=poke_tables[2]
gen3_list=poke_tables[3]
gen4_list=poke_tables[4]
gen5_list=poke_tables[5]
gen6_list=poke_tables[6]
gen7_list=poke_tables[7]
gen8_list=poke_tables[8]

### Save dataset in a JSON

Generation 1 Pokemons

In [None]:
gen_json = []

current_gen = gen1_list.contents

for i in range(3, len(current_gen), 2):
    poke_info=current_gen[i]
    kdex=poke_info.contents[1].text.strip()
    ndex=poke_info.contents[3].text.strip()
    name=poke_info.contents[7].text.strip()

    row=poke_info.contents[7]
    url=row.find("a", href=True)["href"]
    base="https://bulbapedia.bulbagarden.net"
    url=base+url

    type1=poke_info.contents[9].text.strip()

    if len(poke_info.contents) > 10:
        type2=poke_info.contents[11].text.strip()
        gen_json.append({
            "kdex": kdex,
            "ndex": ndex,
            "name": name, 
            "type1": type1,
            "type2": type2,
            "url": url
        })
    else:
        gen_json.append({
            "kdex": kdex,
            "ndex": ndex,
            "name": name,
            "type1": type1,
            "url": url
        })
        
gen_json


with open('gen1.json', 'w') as outfile:
    json.dump(gen_json, outfile)

Generation 2 Pokemon

In [None]:
gen_json = []

current_gen = gen2_list.contents

for i in range(3, len(current_gen), 2):
    poke_info=current_gen[i]
    jdex=poke_info.contents[1].text.strip()
    ndex=poke_info.contents[3].text.strip()
    name=poke_info.contents[7].text.strip()

    row=poke_info.contents[7]
    url=row.find("a", href=True)["href"]
    base="https://bulbapedia.bulbagarden.net"
    url=base+url

    type1=poke_info.contents[9].text.strip()

    if len(poke_info.contents) > 10:
        type2=poke_info.contents[11].text.strip()
        gen_json.append({
            "jdex": jdex,
            "ndex": ndex,
            "name": name, 
            "type1": type1,
            "type2": type2,
            "url": url
        })
    else:
        gen_json.append({
            "jdex": jdex,
            "ndex": ndex,
            "name": name,
            "type1": type1,
            "url": url
        })
        
gen_json


with open('gen2.json', 'w') as outfile:
    json.dump(gen_json, outfile)

Generation 3 Pokemon

In [None]:
gen_json = []

current_gen = gen3_list.contents

for i in range(3, len(current_gen), 2):
    poke_info=current_gen[i]
    hdex=poke_info.contents[1].text.strip()
    ndex=poke_info.contents[3].text.strip()
    name=poke_info.contents[7].text.strip()

    row=poke_info.contents[7]
    url=row.find("a", href=True)["href"]
    base="https://bulbapedia.bulbagarden.net"
    url=base+url

    type1=poke_info.contents[9].text.strip()

    if len(poke_info.contents) > 10:
        type2=poke_info.contents[11].text.strip()
        gen_json.append({
            "hdex": hdex,
            "ndex": ndex,
            "name": name, 
            "type1": type1,
            "type2": type2,
            "url": url
        })
    else:
        gen_json.append({
            "hdex": hdex,
            "ndex": ndex,
            "name": name,
            "type1": type1,
            "url": url
        })
        
gen_json


with open('gen3.json', 'w') as outfile:
    json.dump(gen_json, outfile)

Generation 4 Pokemon

In [None]:
gen_json = []

current_gen = gen4_list.contents

for i in range(3, len(current_gen), 2):
    poke_info=current_gen[i]
    sdex=poke_info.contents[1].text.strip()
    ndex=poke_info.contents[3].text.strip()
    name=poke_info.contents[7].text.strip()

    row=poke_info.contents[7]
    url=row.find("a", href=True)["href"]
    base="https://bulbapedia.bulbagarden.net"
    url=base+url

    type1=poke_info.contents[9].text.strip()

    if len(poke_info.contents) > 10:
        type2=poke_info.contents[11].text.strip()
        gen_json.append({
            "sdex": sdex,
            "ndex": ndex,
            "name": name, 
            "type1": type1,
            "type2": type2,
            "url": url
        })
    else:
        gen_json.append({
            "sdex": sdex,
            "ndex": ndex,
            "name": name,
            "type1": type1,
            "url": url
        })
        
gen_json


with open('gen4.json', 'w') as outfile:
    json.dump(gen_json, outfile)

Generation 5 Pokemon

In [None]:
gen_json = []

current_gen = gen5_list.contents

for i in range(3, len(current_gen), 2):
    poke_info=current_gen[i]
    udex=poke_info.contents[1].text.strip()
    ndex=poke_info.contents[3].text.strip()
    name=poke_info.contents[7].text.strip()

    row=poke_info.contents[7]
    url=row.find("a", href=True)["href"]
    base="https://bulbapedia.bulbagarden.net"
    url=base+url

    type1=poke_info.contents[9].text.strip()

    if len(poke_info.contents) > 10:
        type2=poke_info.contents[11].text.strip()
        gen_json.append({
            "udex": udex,
            "ndex": ndex,
            "name": name, 
            "type1": type1,
            "type2": type2,
            "url": url
        })
    else:
        gen_json.append({
            "udex": udex,
            "ndex": ndex,
            "name": name,
            "type1": type1,
            "url": url
        })
        
gen_json


with open('gen5.json', 'w') as outfile:
    json.dump(gen_json, outfile)

Generation 6 Pokemon

In [None]:
gen_json = []

current_gen = gen6_list.contents

for i in range(3, len(current_gen), 2):
    poke_info=current_gen[i]
    kdex=poke_info.contents[1].text.strip()
    ndex=poke_info.contents[3].text.strip()
    name=poke_info.contents[7].text.strip()

    row=poke_info.contents[7]
    url=row.find("a", href=True)["href"]
    base="https://bulbapedia.bulbagarden.net"
    url=base+url

    type1=poke_info.contents[9].text.strip()

    if len(poke_info.contents) > 10:
        type2=poke_info.contents[11].text.strip()
        gen_json.append({
            "kdex": kdex,
            "ndex": ndex,
            "name": name, 
            "type1": type1,
            "type2": type2,
            "url": url
        })
    else:
        gen_json.append({
            "kdex": kdex,
            "ndex": ndex,
            "name": name,
            "type1": type1,
            "url": url
        })
        
gen_json


with open('gen6.json', 'w') as outfile:
    json.dump(gen_json, outfile)

Generation 7 *Pokemon*

In [None]:
gen_json = []

current_gen = gen7_list.contents

for i in range(3, len(current_gen), 2):
    poke_info=current_gen[i]
    adex=poke_info.contents[1].text.strip()
    ndex=poke_info.contents[3].text.strip()
    name=poke_info.contents[7].text.strip()

    row=poke_info.contents[7]
    url=row.find("a", href=True)["href"]
    base="https://bulbapedia.bulbagarden.net"
    url=base+url

    type1=poke_info.contents[9].text.strip()

    if len(poke_info.contents) > 10:
        type2=poke_info.contents[11].text.strip()
        gen_json.append({
            "adex": adex,
            "ndex": ndex,
            "name": name, 
            "type1": type1,
            "type2": type2,
            "url": url
        })
    else:
        gen_json.append({
            "adex": adex,
            "ndex": ndex,
            "name": name,
            "type1": type1,
            "url": url
        })
        
gen_json


with open('gen7.json', 'w') as outfile:
    json.dump(gen_json, outfile)

Generation 8 Pokemon

In [None]:
gen_json = []

current_gen = gen8_list.contents

for i in range(3, len(current_gen), 2):
    poke_info=current_gen[i]
    gdex=poke_info.contents[1].text.strip()
    ndex=poke_info.contents[3].text.strip()
    name=poke_info.contents[7].text.strip()

    row=poke_info.contents[7]
    url=row.find("a", href=True)["href"]
    base="https://bulbapedia.bulbagarden.net"
    url=base+url

    type1=poke_info.contents[9].text.strip()

    if len(poke_info.contents) > 10:
        type2=poke_info.contents[11].text.strip()
        gen_json.append({
            "gdex": gdex,
            "ndex": ndex,
            "name": name, 
            "type1": type1,
            "type2": type2,
            "url": url
        })
    else:
        gen_json.append({
            "gdex": gdex,
            "ndex": ndex,
            "name": name,
            "type1": type1,
            "url": url
        })
        
gen_json


with open('gen8.json', 'w') as outfile:
    json.dump(gen_json, outfile)