# Libraries

In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

# Class

In [4]:
class Monster():
    def __init__(self, url = 'https://monsterhunter.fandom.com/wiki/Monster_List'):
        '''
        Initializing the class to get monster information        
        Parameters
        ----------
        url : str
            URL of the target site
        '''
        self.url = url

    def scounting(self):
        html = requests.get(self.url).text
        soup = BeautifulSoup(html)

        monster_list = soup.find_all(name = 'a', attrs = {'href':True, 'title':True, 'class':False, 'data-tracking-label':False})
        
        main_url = 'https://monsterhunter.fandom.com'
        monster_url_list = set([main_url + url['href'] if url['href'].startswith('/wiki/') else next for url in monster_list])

        return list(monster_url_list)

    def researching(self):
        urls = self.scounting()

        set_elements = set(['Darkness','Thunder Pole','Sound','Dragon','Wind','Frozen Seraphim','Fire',
                            'Kanade','Light','Crimson Demon','Ice','Burning Zero','Thunder','Water',
                            "Emperor's Roar",'Black Flame','Tenshou','Blaze'])

        set_ailments = set(['Blastblight','Vocal Cord Paralysis','Stench','Hellfireblight','Corrupted Poison','Poison','Fireblight',
                            'Silked','Extreme Poison','Drunken','Fatigue','Waterblight','Snowman','Muddy','Crystallization','Tarred',
                            'Extreme Iceblight','Bleeding','Extreme Thunderblight','Frozen','Confusion','Noxious Poison','Blastscourge',
                            'Extreme Dragonblight','Extreme Waterblight','Bubbleblight','Dracophage Erosion','Movement Down','Extreme Sleep',
                            'Dark','Frostbite','Webbed','Extreme Paralysis','Rust','Iceblight','Stabbed','Ossified','Mucus','Positive Charge',
                            'Stun','Slimeblight','Effluvium','Dragonblight','Venom','Thunderblight','Paralysis','Negative Charge','Frenzy Virus',
                            'Sleep','Felvine-Scented','Extreme Fireblight','Zombification','Defense Down','Soiled','Bloodblight','Muck','Magnetism',
                            'Deadly Poison'])
        
        dict_lst = []
        error_lst = []
        
        for link in urls:
            html = requests.get(link).text
            soup = BeautifulSoup(html)

            monster_dict = {}

            # Name
            try:
                monster_dict['name']  = soup.find_all(name = 'h2',
                                                    attrs = {'data-source':'Name'}
                                                    )[0].text
            except:
                monster_dict['name'] = 'Remove'

            # English Title
            try: 
                info = soup.find_all(name = 'div',
                                    attrs = {'class':'pi-data-value pi-font'}
                                    )[0].contents
                monster_dict['english_title'] =  [str(i) for i in info if str(i) != '<br/>' and '<' not in str(i)]
            except:
                monster_dict['english_title'] = []

            # Monster Type
            try:
                monster_dict['monster_type'] = soup.find_all(name = 'div',
                                                            attrs = {'data-source':'Monster Type'}
                                                            )[0].div.text
            except:
                monster_dict['monster_type'] = []

            # Generation
            try:
                monster_dict['generation'] = soup.find_all(name = 'div',
                                                            attrs = {'data-source':'Generation'}
                                                            )[0].div.text
            except:
                monster_dict['generation'] = []

            # Elements
            try:
                monster_dict['element'] = list(set(soup.find_all(name = 'div',
                                                                attrs = {'data-source':'Element'}
                                                                )[0].div.text.split()).intersection(set_elements))
            except:
                monster_dict['element'] = []

            # Ailments
            try:
                monster_dict['ailments'] = list(set(soup.find_all(name = 'div',
                                                                attrs = {'data-source':'Ailments'}
                                                                )[0].div.text.split()).intersection(set_ailments))
            except:
                monster_dict['ailments'] = []

            # Weakest to
            try:
                monster_dict['weakest_to'] = list(set(soup.find_all(name = 'div',
                                                                    attrs = {'data-source':'Weakest to'}
                                                                    )[0].div.text.split()).intersection(set_elements))
            except:
                monster_dict['weakest_to'] = []

            # Habitats
            try:
                info = soup.find_all(name = 'div',
                                    attrs = {'data-source':'Habitats'}
                                    )[0].text.split(',')

                monster_dict['habitats'] =  [i.replace('\nHabitat\n','').strip() for i in info[1:]]
            except:
                try:
                    info = soup.find_all(name = 'div',
                                    attrs = {'data-source':'habitats'}
                                    )[0].div.contents

                    monster_dict['habitats'] = [i.text for i in info if str(i) != '<br/>']

                except:

                    monster_dict['habitats'] = []

            # Related Monsters 
            try:
                info = soup.find_all(name = 'div',
                                    attrs = {'data-source':'Monster Relations'}
                                    )[0].text.split(',')

                monster_dict['related_monsters'] =  [i.replace('\nRelated Monsters\n','').strip() for i in info]

            except:

                monster_dict['related_monsters'] = []

            dict_lst.append(monster_dict)           
        return dict_lst

    def hunting(self):
        data = self.researching()

        dataframe = pd.DataFrame(data)
        dataframe = dataframe.loc[dataframe['name'] != 'Remove']

        return dataframe

# Executing

In [7]:
fellow_hunter = Monster()
df = fellow_hunter.hunting()
df.to_csv('monsters.csv')