In [None]:
import requests
from bs4 import BeautifulSoup
import pickle
import pandas as pd

In [None]:
# ポケモン名一覧リスト
pokemon_list = pickle.load(open('./pokedex_name_list.pkl', 'rb'))

In [None]:
# タイプ一覧データフレーム
df_type = pd.read_csv('./type.csv')

In [None]:
# アタックデックスデータフレーム
df_attackdex = pd.read_csv('./attackdex.csv')

In [None]:
# 関数定義：タイプリンクを含む要素を入力して、タイプ名を返す関数
def type_func(elem):
    type_link = elem['href'].split('/')[-1]
    return df_type.query(f'link_text == "{type_link}"')['japanese'].values[0]

In [None]:
# MongoDBに接続するオブジェクトの生成 < DB名: pokemon / Collection名: pokedex >
from pymongo import MongoClient
client = MongoClient()
db = client['pokemon']

In [None]:
# MongoDBに格納する処理（第4世代と剣盾には対応）
# 処理を開始するとindexが493まで実行可能 ※ 対応できていない世代はURLの形式が異なる
# 412ミノマダムと491シェイミでエラーが発生する（レイアウトが異なるため）、個別に対応する
for pokemon in pokemon_list:
    pokemon = pokemon.replace(' ', '')
    if '♀' in pokemon:
        pokemon_name = pokemon.replace('♀', 'f').lower()
    elif '♂' in pokemon:
        pokemon_name = pokemon.replace('♂', 'm').lower()
    else:
        pokemon_name = pokemon.lower()

    url = f'https://www.serebii.net/pokedex-swsh/{pokemon_name}/'
    res = requests.get(url)
    soup = BeautifulSoup(res.text, 'html.parser')

    item = {}
    item['English_name'] = pokemon
    item['Japanese_name'] = soup.select('table.dextable')[1].select('td.fooinfo')[1].select('td')[1].contents[-1]

    item['Type'] = [type_func(elem) for elem in soup.select('table.dextable')[1].select('td.cen')[0].select('a')]
    
    for dextable_elem in soup.select('table.dextable'):
        if dextable_elem.select('h3'):
            # レベルアップで覚える技
            if dextable_elem.select('h3')[0].text == 'Standard Level Up' or dextable_elem.select('h3')[0].text == 'Standard Level Up - Brilliant Diamond & Shining Pearl':
                level_up_attacks = []
                elem = dextable_elem.select('tr')
                for i in range(2, len(elem), 2):
                    attack = {}
                    attack['Level'] = elem[i].select('td')[0].text
                    attack['English_name'] = elem[i].select('td')[1].a.text
                    attack['Japanese_name'] = df_attackdex[df_attackdex['English_name'] == elem[i].select('td')[1].a.text]['Japanese_name'].values[0]
                    level_up_attacks.append(attack)
                item['Level_up_attacks'] = level_up_attacks

            # わざマシンで覚える技
            elif dextable_elem.select('h3')[0].text == 'BDSP Technical Machine Attacks':
                tech_machine_attacks = []
                elem = dextable_elem.select('tr')
                # for i in range(2, len(elem), 3): # 一部のポケモンはこちらのfor文を使用
                for i in range(2, len(elem), 2):
                    attack = {}
                    attack['No'] = elem[i].select('td')[0].text[-2:]
                    attack['English_name'] = elem[i].select('td')[1].a.text
                    attack['Japanese_name'] = df_attackdex[df_attackdex['English_name'] == elem[i].select('td')[1].a.text]['Japanese_name'].values[0]
                    tech_machine_attacks.append(attack)
                item['Technical_machine_attacks'] = tech_machine_attacks

            # タマゴ技
            elif dextable_elem.select('h3')[0].text == 'Egg Moves':
                egg_attacks = []
                elem = dextable_elem.select('tr')
                for i in range(2, len(elem), 2):
                    attack = {}
                    attack['English_name'] = elem[i].select('td')[0].a.contents[0]
                    attack['Japanese_name'] = df_attackdex[df_attackdex['English_name'] == elem[i].select('td')[0].a.contents[0]]['Japanese_name'].values[0]
                    egg_attacks.append(attack)
                item['Egg_attacks'] = egg_attacks

    # MongoDB へ保存 < DB名: pokemon / Collection名: pokedex >
    db.pokedex.insert_one(item)