## Libraries

In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [2]:
class Monster():
    def __init__(self, url = 'https://monsterhunter.fandom.com/wiki/Monster_List'):
        '''
        Initializing the class to get monster information        
        Parameters
        ----------
        url : str
            URL of the target site
        '''
        self.url = url

    def scounting(self):
        html = requests.get(self.url).text
        soup = BeautifulSoup(html)

        monster_list = soup.find_all(name = 'a', attrs = {'href':True, 'title':True, 'class':False, 'data-tracking-label':False})
        
        main_url = 'https://monsterhunter.fandom.com'
        monster_url_list = set([main_url + url['href'] if url['href'].startswith('/wiki/') else next for url in monster_list])

        return list(monster_url_list)

    def researching(self):
        urls = self.scounting()

        set_elements = set(['Darkness','Thunder Pole','Sound','Dragon','Wind','Frozen Seraphim','Fire',
                            'Kanade','Light','Crimson Demon','Ice','Burning Zero','Thunder','Water',
                            "Emperor's Roar",'Black Flame','Tenshou','Blaze'])

        set_ailments = set(['Blastblight','Vocal Cord Paralysis','Stench','Hellfireblight','Corrupted Poison','Poison','Fireblight',
                            'Silked','Extreme Poison','Drunken','Fatigue','Waterblight','Snowman','Muddy','Crystallization','Tarred',
                            'Extreme Iceblight','Bleeding','Extreme Thunderblight','Frozen','Confusion','Noxious Poison','Blastscourge',
                            'Extreme Dragonblight','Extreme Waterblight','Bubbleblight','Dracophage Erosion','Movement Down','Extreme Sleep',
                            'Dark','Frostbite','Webbed','Extreme Paralysis','Rust','Iceblight','Stabbed','Ossified','Mucus','Positive Charge',
                            'Stun','Slimeblight','Effluvium','Dragonblight','Venom','Thunderblight','Paralysis','Negative Charge','Frenzy Virus',
                            'Sleep','Felvine-Scented','Extreme Fireblight','Zombification','Defense Down','Soiled','Bloodblight','Muck','Magnetism',
                            'Deadly Poison'])
        
        dict_lst = []
        error_lst = []
        
        for link in urls:
            try:
                html = requests.get(link).text
                soup = BeautifulSoup(html)

                monster_dict = {}

                # Name
                monster_dict['Name'] = link.replace('https://monsterhunter.fandom.com/wiki/','')

                # English Title 
                info = soup.find_all(name = 'div',
                                    attrs = {'class':'pi-data-value pi-font'}
                                    )[0].contents
                monster_dict['English_Title'] =  [str(i) for i in info if str(i) != '<br/>']

                # Mosnter Type
                monster_dict['Monster Type'] = soup.find_all(name = 'div',
                                                            attrs = {'data-source':'Monster Type'}
                                                            )[0].div.text

                # Generation
                monster_dict['Generation'] = soup.find_all(name = 'div',
                                                            attrs = {'data-source':'Generation'}
                                                            )[0].div.text
                
                # Elements
                info = set(soup.find_all(name = 'div',
                                        attrs = {'data-source':'Element'}
                                        )[0].div.text.split()).intersection(set_elements)
                monster_dict['Element'] = list(info)
                
                # Ailments
                info = set(soup.find_all(name = 'div',
                                        attrs = {'data-source':'Ailments'}
                                        )[0].div.text.split()).intersection(set_ailments)

                monster_dict['Ailments'] = list(info)

                # Weakest to
                monster_dict['Weakest_to'] = list(set(soup.find_all(name = 'div',
                                                                    attrs = {'data-source':'Weakest to'}
                                                                    )[0].div.text.split()).intersection(set_elements))

                # Habitats
                info = soup.find_all(name = 'div',
                                    attrs = {'data-source':'Habitats'}
                                    )[0].text.split(',')

                monster_dict['Habitats'] =  [i.replace('\nHabitat\n','').strip() for i in info[1:]]

                # Size pulado

                # Related Monsters 
                info = soup.find_all(name = 'div',
                                    attrs = {'data-source':'Monster Relations'}
                                    )[0].text.split(',')

                monster_dict['Related_monsters'] =  [i.replace('\nRelated Monsters\n','').strip() for i in info]

                dict_lst.append(monster_dict)
            except:
                # print(link.replace('https://monsterhunter.fandom.com/wiki/',''))
                error_lst.append(link.replace('https://monsterhunter.fandom.com/wiki/',''))
                pass
        
        return dict_lst,error_lst

    def hunting(self):
        data,errors = self.researching()

        dataframe = pd.DataFrame(data)

        return dataframe,errors

In [5]:
fellow_hunter = Monster()
# fellow_hunter.researching()
df,errors = fellow_hunter.hunting()

print(errors)

['Drilltusk_Tetsucabra', 'Gargwa', 'Magnamalo', 'Banbaro', 'Aknosom', 'Wulg', 'Bishaten', 'Shara_Ishvalda', 'Category:Lists', 'Garangolm', 'Malzeno', 'Aurora_Somnacanth', 'Grimalkyne', 'Somnacanth', 'Monster_List_(Spin-Off)', 'Gowngoat', 'Lunagaron', 'Boggi', 'Kelbi', 'Popo', 'Bombadgy', 'Blood_Orange_Bishaten', 'Gajau', 'Magma_Almudron', 'Boaboa', 'Category:Monsters', 'Remobra', 'Minor_Monsters']


In [143]:
set_elements = set(['Darkness','Thunder Pole','Sound','Dragon','Wind','Frozen Seraphim','Fire',
                    'Kanade','Light','Crimson Demon','Ice','Burning Zero','Thunder','Water',
                    "Emperor's Roar",'Black Flame','Tenshou','Blaze'])

set_ailments = set(['Blastblight','Vocal Cord Paralysis','Stench','Hellfireblight','Corrupted Poison','Poison','Fireblight',
                    'Silked','Extreme Poison','Drunken','Fatigue','Waterblight','Snowman','Muddy','Crystallization','Tarred',
                    'Extreme Iceblight','Bleeding','Extreme Thunderblight','Frozen','Confusion','Noxious Poison','Blastscourge',
                    'Extreme Dragonblight','Extreme Waterblight','Bubbleblight','Dracophage Erosion','Movement Down','Extreme Sleep',
                    'Dark','Frostbite','Webbed','Extreme Paralysis','Rust','Iceblight','Stabbed','Ossified','Mucus','Positive Charge',
                    'Stun','Slimeblight','Effluvium','Dragonblight','Venom','Thunderblight','Paralysis','Negative Charge','Frenzy Virus',
                    'Sleep','Felvine-Scented','Extreme Fireblight','Zombification','Defense Down','Soiled','Bloodblight','Muck','Magnetism',
                    'Deadly Poison'])

# html = requests.get('https://monsterhunter.fandom.com/wiki/Alatreon').text
html = requests.get('https://monsterhunter.fandom.com/wiki/Shara_Ishvalda').content

soup = BeautifulSoup(html)

monster_dict = {}

# English Title
try: 
    info = soup.find_all(name = 'div',
                        attrs = {'class':'pi-data-value pi-font'}
                        )[0].contents
    monster_dict['English_Title'] =  [str(i) for i in info if str(i) != '<br/>' and '<' not in str(i)]
except:
    monster_dict['English_Title'] = []

# Mosnter Type
try:
    monster_dict['Monster Type'] = soup.find_all(name = 'div',
                                                attrs = {'data-source':'Monster Type'}
                                                )[0].div.text
except:
    monster_dict['Monster Type'] = []

# Generation
try:
    monster_dict['Generation'] = soup.find_all(name = 'div',
                                                attrs = {'data-source':'Generation'}
                                                )[0].div.text
except:
    monster_dict['Generation'] = []

# Elements
try:
    monster_dict['Element'] = list(set(soup.find_all(name = 'div',
                                                    attrs = {'data-source':'Element'}
                                                    )[0].div.text.split()).intersection(set_elements))
except:
    monster_dict['Element'] = []

# Ailments
try:
    monster_dict['Ailments'] = list(set(soup.find_all(name = 'div',
                                                    attrs = {'data-source':'Ailments'}
                                                    )[0].div.text.split()).intersection(set_ailments))
except:
    monster_dict['Ailments'] = []

# Weakest to
try:
    monster_dict['Weakest_to'] = list(set(soup.find_all(name = 'div',
                                                        attrs = {'data-source':'Weakest to'}
                                                        )[0].div.text.split()).intersection(set_elements))
except:
    monster_dict['Weakest_to'] = []

# Habitats
try:
    info = soup.find_all(name = 'div',
                        attrs = {'data-source':'Habitats'}
                        )[0].text.split(',')

    monster_dict['Habitats'] =  [i.replace('\nHabitat\n','').strip() for i in info[1:]]
except:
    try:
        info = soup.find_all(name = 'div',
                        attrs = {'data-source':'Habitats'}
                        )[0].div.contents

        monster_dict['Habitats'] = [i.text for i in info if str(i) != '<br/>']

    except:

        monster_dict['Habitats'] = []

# Related Monsters 
try:
    info = soup.find_all(name = 'div',
                        attrs = {'data-source':'Monster Relations'}
                        )[0].text.split(',')

    monster_dict['Related_monsters'] =  [i.replace('\nRelated Monsters\n','').strip() for i in info]

except:

    monster_dict['Related_monsters'] = []

print(monster_dict)

{'English_Title': ['地啼龍'], 'Monster Type': 'Elder Dragon', 'Generation': 'Fifth', 'Element': [], 'Ailments': ['Stun'], 'Weakest_to': ['Ice'], 'Habitats': [], 'Related_monsters': ['None']}


Inserir tratativa especifica para
- Habitat
    - Magnamalo
    - Banbaro
    - Aknosom
    - Wulg
    - Bishaten
    



- Relate monsters:
    - Bishaten

Shara_Ishvalda',
'Category:Lists',
'Garangolm',
'Malzeno',
'Aurora_Somnacanth',
'Grimalkyne',
'Somnacanth',
'Monster_List_(Spin-Off)',
'Gowngoat',
'Lunagaron',
'Boggi',
'Kelbi',
'Popo',
'Bombadgy',
'Blood_Orange_Bishaten',
'Gajau',
'Magma_Almudron',
'Boaboa',
'Category:Monsters',
'Remobra',
'Minor_Monsters'