In [64]:
import requests
import re
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
def get_cata_url(catagory):
    ''' Takes in a string containing a player card catagory
        Scrapes arkhamdb
        Returns a list of urls for containing information on each card of that catagory'''

    # get url for page to scrape
    url = f'https://arkhamdb.com/find?q=t%3A{catagory}&decks=player'

    # create request and soup objects
    html = requests.get(url)

    soup = BeautifulSoup(html.content, 'html.parser')

    # locate urls on page
    results = soup.find(id='list')

    results = results.find_all('a', class_='card-tip')

    # convert urls to string and make a list
    results = [str(result['href']) for result in results]

    return results

In [3]:
def get_icons(results):
    '''Takes in request results for an arkhamdb page containing player card data
       Returns a string containing the skill icons printed on that cart'''
      
    icons = ''

    # list containing each icon type
    icon_types = ['wild', 'willpower', 'combat', 'agility', 'intellect']

    # itterate through icon types
    for stat in icon_types:

        # get number of that icon on card from request results
        num_icons = len(results.find_all('span', class_=f'icon icon-{stat} color-{stat}'))

        # add that icon name to a string for each time it appears in request results
        for icon in range(num_icons):

            icons += f'{stat} '
            
    return icons.upper()[:-1]

In [94]:
    
def get_ability_text(results):
    '''Gets ability text for card'''
    
    ability = get_ability_html(results)
    
    ability = get_ability_string(ability)
    
    return ability
    
    
    
def get_ability_html(results):
    '''Returns bs object in results containing ability text by trying differint faction names in class'''
    
    # itterate through factions to find class name for ability text and get bs object containing text
    factions = ['guardian', 'mystic', 'neutral', 'rogue', 'seeker', 'survivor']

    for faction in factions:

        ability = results.find('div', class_=f'card-text border-{faction}')
        
        # break loop if result is found
        if ability != None:

            break
            
    return ability


def get_ability_string(ability):
    '''Takes in bs object contining player card ability text
       converts object to a string
       replaces html indicating an icon with uppercase word equivalent
       Returns string '''
      
    # convert html to string
    ability = str(ability)
        
    # replace icon html with matching uppercase word
    icon_types = ['wild', 'willpower', 'combat', 'agility', 'intellect']
    
    for icon in icon_types:
    
        ability = ability.replace(f'<span class="icon-{icon}" title="{icon.capitalize()}"></span>', 
                                  f'{icon.upper()}')

    # delete extraneous html
    html = ['<div class="card-text border-rogue">\n<p>',
            '<div class="card-text border-survivor">\n<p>',
            '<div class="card-text border-seeker">\n<p>',
            '<div class="card-text border-guardian">\n<p>',
            '<div class="card-text border-mystic">\n<p>',
            '<div class="card-text border-neutral">\n<p>',
            '</p>\n</div>']
    
    for item in html:
        
        ability = ability.replace(item,'')

    return ability

In [65]:
invest = get_cata_url('investigator')
assets = get_cata_url('asset')
events = get_cata_url('event')
skills = get_cata_url('skill')

In [6]:
for cata in [invest, assets, events, skills]:
    
    print(cata[0:6])
    print()

['https://arkhamdb.com/card/02005', 'https://arkhamdb.com/card/01003', 'https://arkhamdb.com/card/01503', 'https://arkhamdb.com/card/01004', 'https://arkhamdb.com/card/01504', 'https://arkhamdb.com/card/03004']

['https://arkhamdb.com/card/60505', 'https://arkhamdb.com/card/60522', 'https://arkhamdb.com/card/07025', 'https://arkhamdb.com/card/07305', 'https://arkhamdb.com/card/03020', 'https://arkhamdb.com/card/52001']

['https://arkhamdb.com/card/03304', 'https://arkhamdb.com/card/52002', 'https://arkhamdb.com/card/06156', 'https://arkhamdb.com/card/08021', 'https://arkhamdb.com/card/60114', 'https://arkhamdb.com/card/60123']

['https://arkhamdb.com/card/09002', 'https://arkhamdb.com/card/03272', 'https://arkhamdb.com/card/03233', 'https://arkhamdb.com/card/05038', 'https://arkhamdb.com/card/10095', 'https://arkhamdb.com/card/04309']



In [95]:
def get_soup(url):
    '''Takes in a url for a card
       Returns html request result parsed using beautiful soup'''
    
    # create request and soup objects
    html = requests.get(url)

    soup = BeautifulSoup(html.content, 'html.parser')

    # locate urls on page and return
    return soup.find(id='list')

def get_card_traits(results):
    '''Takes in html request parsed by beautiful soup
       Returns card traits for that request'''
    
    title = results.find('a', class_='card-name card-tip').text.replace('\n', '').replace('\t', '')

    test_icons = get_icons(results)

    traits = results.find('p', class_='card-traits').text.replace('\n', '').replace('\t', '')

    faction = results.find('span', class_='card-faction').text.replace('\n', '').replace('\t', '')

    ability = get_ability_text(results)

    tipe = results.find('span', class_='card-type').text.replace('\n', '').replace('\t', '')

    flavor = results.find('div', class_='card-flavor small').text.replace('\n', '').replace('\t', '')

    artist = results.find('div', class_='card-illustrator').text.replace('\n', '').replace('\t', '')

    expansion = results.find('div', class_='card-pack').text.replace('\n', '').replace('\t', '').replace('.', '')

    return title, test_icons, traits, faction, ability, tipe, flavor, artist, expansion

# dictionary with empty traits
skill_traits = {'title':[],
                'test_icons':[],
                'traits':[],
                'faction':[],
                'ability':[],
                'type':[],
                'flavor':[],
                'artist':[],
                'expansion':[]}


for url in get_cata_url('skill'):

    results = get_soup(url)

    title, test_icons, traits, faction, ability, tipe, flavor, artist, expansion = get_card_traits(results)
    
    # list of trait values
    trait_list = [title, test_icons, traits, faction, ability, tipe, flavor, artist, expansion]

    for i, key in enumerate(skill_traits):

        skill_traits[key].append(trait_list[i])

df_skill = pd.DataFrame(skill_traits)
pd.DataFrame(skill_traits)

Unnamed: 0,title,test_icons,traits,faction,ability,type,flavor,artist,expansion
0,"""As you wish""",WILD WILD WILD,Practiced. Expert.,Neutral,Carson Sinclair deck only.</p><p>Commit only t...,Skill,,Tiziano Baracchi,The Scarlet Keys Investigator Expansion #2
1,"""Not without a fight!""",WILLPOWER COMBAT AGILITY,Innate.,Survivor,Commit to a skill test only if you are engaged...,Skill,,Aurore Folny,Black Stars Rise #272
2,"""Watch this!""",WILLPOWER COMBAT AGILITY,Gambit.,Rogue,Commit only to a skill test you are performing...,Skill,Show-off.,Andreia Ugrai,The Pallid Mask #233
3,Able Bodied,COMBAT AGILITY,Innate.,Survivor,While you control 2 or fewer <b><i>Item</i></b...,Skill,,Anna Steinbauer,The Circle Undone #38
4,Accursed,WILD,Innate. Cursed.,Mystic,"When you commit Accursed to a skill test, add ...",Skill,Not all who wander are lost.,David Hovey,The Feast of Hemlock Vale Investigator Expansi...
...,...,...,...,...,...,...,...,...,...
116,Vicious Blow,COMBAT,Practiced.,Guardian,If this skill test is successful during an att...,Skill,"With a sickening smack, he struck the abominat...",JB Casacop,Revised Core Set #25
117,Vicious Blow,COMBAT COMBAT,Practiced. Expert.,Guardian,If this skill test is successful during an att...,Skill,"With a sickening smack, he struck the abominat...",JB Casacop,Lost in Time and Space #299
118,Vicious Blow,COMBAT,Practiced.,Guardian,If this skill test is successful during an att...,Skill,"With a sickening smack, he struck the abominat...",JB Casacop,Nathaniel Cho #19
119,Well-Funded,WILD,Fortune.,Seeker,While you control a <b><i>Science</i></b> or <...,Skill,Every cent of that research grant counts.,Pixoloid Studios,The Feast of Hemlock Vale Investigator Expansi...


In [87]:
tipe

'Skill'

In [81]:


for key in skill_traits:
    
    skill_traits[col]
    



title
test_icons
traits
faction
ability
type
flavor
artist
expansion


Unnamed: 0,title,test_icons,traits,faction,ability,type,flavor,artist,expansion


In [10]:
ability = ability.string
print()
print(ability)
print()
print(type(ability))

AttributeError: 'str' object has no attribute 'string'

In [None]:


p = ability.find_all('p')
paragraphs = []
for x in p:
    paragraphs.append(str(x))

In [None]:
ability_full = ''

for paragraph in ability:
    for line in paragraph:
        print(str(line))
        print(type(line))

In [None]:
traits = results.find('p', class_='card-traits').text

In [None]:
print(title)

In [None]:
test_icons

In [None]:
ability