In [23]:
from bs4 import BeautifulSoup
import requests

In [24]:
url = 'https://pokemondb.net/pokedex/all'

page = requests.get(url)

soup = BeautifulSoup(page.text, 'html')

In [25]:
# Checks website html
print(soup)

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<title>Pokémon Pokédex: list of Pokémon with stats | Pokémon Database</title>
<link href="https://img.pokemondb.net" rel="preconnect"/>
<link href="https://s.pokemondb.net" rel="preconnect"/>
<link as="font" crossorigin="" href="/static/fonts/fira-sans-v17-latin-400.woff2" rel="preload" type="font/woff2"/>
<link as="font" crossorigin="" href="/static/fonts/fira-sans-v17-latin-400i.woff2" rel="preload" type="font/woff2"/>
<link as="font" crossorigin="" href="/static/fonts/fira-sans-v17-latin-600.woff2" rel="preload" type="font/woff2"/>
<link href="/static/css/pokemondb-aa70195104.css" rel="stylesheet"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<meta content="The Pokédex contains detailed stats for every creature from the Pokémon games, up to and including the latest Scarlet/Violet games." name="description" property="og:description"/>
<link href="https://pokemondb.net/pokedex/all" rel="canonical

In [26]:
f'''
    Finds table with the class name "data-table sticky-header block-wide".
    To check all the tables in a page, use this: {soup.find_all('table')}
'''

table = soup.find('table', class_ = "data-table sticky-header block-wide")

In [27]:
'''
    Finds all th (headers) from the table.
'''

titles = table.find_all('th')

In [28]:
'''
    Create list for title headers with list comprehensions.
    .text: removes <th> </th>
    .strip(): removes \n
'''

table_title_list = [title.text.strip() for title in titles]
print(table_title_list)

['#', 'Name', 'Type', 'Total', 'HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']


In [29]:
import pandas as pd

In [30]:
# store headers in Data Frame
df = pd.DataFrame(columns= table_title_list)

# Print out DataFrane content
df

Unnamed: 0,#,Name,Type,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed


In [31]:
'''
    Finds all row in the table.
'''

column_data = table.find_all('tr')
print(column_data)

[<tr>
<th class="sorting" data-sort-type="int"><div class="sortwrap">#</div></th> <th class="sorting" data-sort-type="string"><div class="sortwrap">Name</div></th> <th><div class="sortwrap">Type</div></th> <th class="sorting" data-sort-default="desc" data-sort-type="int"><div class="sortwrap">Total</div></th> <th class="sorting" data-sort-default="desc" data-sort-type="int"><div class="sortwrap">HP</div></th> <th class="sorting" data-sort-default="desc" data-sort-type="int"><div class="sortwrap">Attack</div></th> <th class="sorting" data-sort-default="desc" data-sort-type="int"><div class="sortwrap">Defense</div></th> <th class="sorting" data-sort-default="desc" data-sort-type="int"><div class="sortwrap">Sp. Atk</div></th> <th class="sorting" data-sort-default="desc" data-sort-type="int"><div class="sortwrap">Sp. Def</div></th> <th class="sorting" data-sort-default="desc" data-sort-type="int"><div class="sortwrap">Speed</div></th> </tr>, <tr>
<td class="cell-num cell-fixed" data-sort-v

In [32]:
'''
    Loops through a row and get data.
'''

for row in column_data[1:]: # starts at index 1 to exclude header row
    row_data = row.find_all('td')
    individual_row_data = [data.text.strip() for data in row_data]

    length = len(df)    # stores how many rows in the df
    df.loc[length] = individual_row_data    # appends individual row data to df every time df updates length

In [33]:
# show the update DataFrame

df

Unnamed: 0,#,Name,Type,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
0,0001,Bulbasaur,Grass Poison,318,45,49,49,65,65,45
1,0002,Ivysaur,Grass Poison,405,60,62,63,80,80,60
2,0003,Venusaur,Grass Poison,525,80,82,83,100,100,80
3,0003,Venusaur Mega Venusaur,Grass Poison,625,80,100,123,122,120,80
4,0004,Charmander,Fire,309,39,52,43,60,50,65
...,...,...,...,...,...,...,...,...,...,...
1210,1023,Iron Crown,Steel Psychic,590,90,72,100,122,108,98
1211,1024,Terapagos Normal Form,Normal,450,90,65,85,65,85,60
1212,1024,Terapagos Terastal Form,Normal,600,95,95,110,105,110,85
1213,1024,Terapagos Stellar Form,Normal,700,160,105,110,130,110,85


In [34]:
'''
    Had to have a separate file path because Windows is protecting my folder for my GitHub Repo,
    thus, executable files cannot create changes.
'''
filepath = r'C:\Users\SERVIN\Desktop\Important local files\Data Analyst\scraped_data'

df.to_csv(f'{filepath}\poke_index_data.csv', index = False)

In [35]:
import sqlite3
import os
import plotly.express as px

In [36]:
df

Unnamed: 0,#,Name,Type,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
0,0001,Bulbasaur,Grass Poison,318,45,49,49,65,65,45
1,0002,Ivysaur,Grass Poison,405,60,62,63,80,80,60
2,0003,Venusaur,Grass Poison,525,80,82,83,100,100,80
3,0003,Venusaur Mega Venusaur,Grass Poison,625,80,100,123,122,120,80
4,0004,Charmander,Fire,309,39,52,43,60,50,65
...,...,...,...,...,...,...,...,...,...,...
1210,1023,Iron Crown,Steel Psychic,590,90,72,100,122,108,98
1211,1024,Terapagos Normal Form,Normal,450,90,65,85,65,85,60
1212,1024,Terapagos Terastal Form,Normal,600,95,95,110,105,110,85
1213,1024,Terapagos Stellar Form,Normal,700,160,105,110,130,110,85


In [37]:
'''
    I already have instance of the df so I am commenting out this file line of code for now.
'''
# df = pd.read_csv('datasets\poke_index_data.csv')  # load csv file

'\n    I already have instance of the df so I am commenting out this file line of code for now.\n'

In [38]:
# Creating SQLite database
db_filepath = r'C:\Users\SERVIN\Desktop\Important local files\Data Analyst\scraped_data'

conn = sqlite3.connect(f'{db_filepath}\database.db')
cursor = conn.cursor()

In [39]:
# create_table = '''CREATE TABLE IF NOT EXISTS pokemonData(
# id INTEGER PRIMARY KEY AUTOINCREMENT,
# numberIdx INTEGER,
# Name VARCHAR(40) NOT NULL,
# Type VARCHAR(40) NOT NULL,
# Total INTEGER NOT NULL,
# HP INTEGER NOT NULL,
# Attack INTEGER NOT NULL,
# Defense INTEGER NOT NULL,
# Sp.Attack INTEGER NOT NULL,
# Sp.Def INTEGER NOT NULL,
# Speed INTEGER NOT NULL);
# '''

In [40]:
'''
    Loading df to SQLite table.
'''
df.to_sql('pokemonData', conn, if_exists='replace', index = False)



1215

In [41]:
'''
    Read sql with pd.
'''
pd.read_sql('''SELECT * FROM pokemonData''', conn)

Unnamed: 0,#,Name,Type,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
0,0001,Bulbasaur,Grass Poison,318,45,49,49,65,65,45
1,0002,Ivysaur,Grass Poison,405,60,62,63,80,80,60
2,0003,Venusaur,Grass Poison,525,80,82,83,100,100,80
3,0003,Venusaur Mega Venusaur,Grass Poison,625,80,100,123,122,120,80
4,0004,Charmander,Fire,309,39,52,43,60,50,65
...,...,...,...,...,...,...,...,...,...,...
1210,1023,Iron Crown,Steel Psychic,590,90,72,100,122,108,98
1211,1024,Terapagos Normal Form,Normal,450,90,65,85,65,85,60
1212,1024,Terapagos Terastal Form,Normal,600,95,95,110,105,110,85
1213,1024,Terapagos Stellar Form,Normal,700,160,105,110,130,110,85
