In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import json

from selenium.webdriver.support.ui import Select, WebDriverWait
img_path = 'https://ddragon.leagueoflegends.com/cdn/7.19.1/img/champion/'
def get_champion_data(url):
    driver = webdriver.Chrome(r'/Users/user/Downloads/chromedriver')
    driver.get(url)
    champ_url = driver.current_url.split('championstats')[0]
    name = find_between(champ_url, "https://lolcounter.gg/", "/")
    folder = 'champstats/'+name
    print("starting ", name)
    select = driver.find_elements_by_tag_name("fieldset")  #get the list of leagues & the list of lanes played
    league_options = select[0].find_elements_by_tag_name("li") #get all list elements inside leagues (platplus, plat, etc.)
    lane_options = select[1].find_elements_by_tag_name("li") #get all list elements inside lanes (mid, top, etc.)

    optionsList = {'lane': [], 'league': []}

    for option in league_options: #iterate over the options, place attribute of onclick in list
        optionsList['league'].append(option.get_attribute("onclick"))

    for option in lane_options: #iterate over the options, place attribute of onclick in list
        optionsList['lane'].append(option.get_attribute("onclick"))

    championJson = {}
    for optionHref in optionsList['league']:
        optionValue = 'platplus'
        if "championstats.php?" not in optionHref:
            optionValue = find_between(optionHref, "championstats", ".php?")
            link = champ_url + find_between(optionHref, "location.href='", "';")
            print(link)
            driver.get(link)
        for laneHref in optionsList['lane']:
            lane = find_between(laneHref, "Lane=", "';")
            link = champ_url + find_between(optionHref, "location.href='", "?") + find_between(laneHref, "hp", "';")
            print(link)
            driver.get(link)
            soup = BeautifulSoup(driver.page_source,'lxml')

            print("starting loop on option {} in lane {}".format(optionValue, lane))
            #build order
            build_order = driver.find_element_by_class_name("build-wrapper")
            build_order_imgs = build_order.find_elements_by_tag_name("img")
            build = []
            for img in build_order_imgs:
                #get url and id
                x = {}
                x["src"] = img.get_attribute("src")
                x["id"] = find_between(x["src"], "/item/", ".png")
                build.append(x)
            
            skill_order = driver.find_element_by_class_name("Skill-Order2")    #get highest win % skill order
            skill_order_table = skill_order.find_element_by_class_name("skill-order")
            skills = skill_order_table.find_elements_by_class_name("skill")
            counter = -1
            skill_images = []
            skill_order = [0] * 18
            for skill in skills:
                if counter == -1:
                    counter += 1
                    continue
                # skill has img and skill selections
                img = skill.find_element_by_tag_name("img") #deal with img first
                img_src = img.get_attribute("src")
                skill_images.append(img_src)
                skills_selection = skill.find_element_by_class_name("skill-selections")
                skills_selection_list = skills_selection.find_elements_by_tag_name("div")
                skill_counter = 0
                current_skill = 'qwer'[counter]
                for skill_selection in skills_selection_list:
                    text = skill_selection.text.replace(" ", "")
                    if text:
                        skill_order[skill_counter] = current_skill
                    skill_counter += 1
                counter += 1
            if skill_order == [0] * 18:
                continue
            if optionValue not in championJson:
                championJson[optionValue] = {}
            if lane not in championJson[optionValue]:
                championJson[optionValue][lane] = {}
            if "skillImages" not in championJson:
                championJson["skillImages"] = skill_images
            championJson[optionValue][lane]["skill_order"] = skill_order

            table = soup.find_all('table')[0] # get stats table
            df = pd.read_html(str(table))[0] # add that to df
            stats_list = json.loads(df.to_json(orient='records'))
            stats_dict = {}
            for stat in stats_list:
                type_stat = stat["Type"]
                ave = stat["Average"]
                stat.clear()
                stats_dict[type_stat] = ave
            championJson[optionValue][lane]["stats"] = stats_dict
            championJson[optionValue][lane]["build"] = build
        #get counters / lane
        link = champ_url
        if optionValue != 'platplus':
            link = champ_url + 'counterstats' + optionValue + '.php'
        #print(link)
        print("getting counter info per league per lane")
        driver.get(link)
        lanes = driver.find_elements_by_class_name("champbox1-subwarpper")
        for lane_elem in lanes:
            lane_with_name = lane_elem.find_element_by_tag_name("h1").text
            if lane_with_name is not None:
                lane = lane_with_name.lower().split(" ")
                #print(lane)
                lane = lane[len(lane) - 1]
                champs = {'weak': [], 'strong': []}
                champ_sections = [lane_elem.find_element_by_class_name("champ-blockwrapper1"), lane_elem.find_element_by_class_name("champ-blockwrapper2")]
                for section in champ_sections:
                    champ_blocks = section.find_elements_by_class_name("champ-block")
                    weak_or_strong = section.find_element_by_tag_name("div").text
                    if 'weak' in weak_or_strong.lower():
                        key = 'weak'
                    else:
                        key = 'strong'
                    for i in range(0, min(5, len(champ_blocks))):
                        champ = champ_blocks[i]
                        x = {}
                        x['name'] = champ.find_element_by_class_name("name").text
                        x['win_rate'] = champ.find_element_by_class_name("per-bar").text
                        style = champ.find_element_by_class_name("left").get_attribute('style')
                        x['img'] = img_path + find_between(style,"champion/", ".png") + ".png"
                        champs[key].append(x)  
                #print(champs)
                if lane == "mid":
                    lane = "MIDDLE"
                if lane.upper() in championJson[optionValue]:
                    championJson[optionValue][lane.upper()]["counters"] = champs
            
    driver.close()
    filename = folder + '.json'
    with open(filename, 'w') as f:
         json.dump(championJson, f)
    print("success! ", name)

In [2]:
def find_between( s, first, last ):
    try:
        start = s.index( first ) + len( first )
        end = s.index( last, start )
        return s[start:end]
    except ValueError:
        return ""

In [9]:
def get_champ_links():
    links = []
    url = 'https://lolcounter.gg/index.php'
    driver = webdriver.Chrome(r'/Users/user/Downloads/chromedriver')
    driver.get(url)
    host = "https://lolcounter.gg/"
    champ_stats = '/championstats.php?Lane=TOP'
    champion_table = driver.find_element_by_id("champion-list")
    elements_list = champion_table.find_elements_by_class_name("inlinechamps")
    for champ in elements_list:
        name = champ.get_attribute("data-champion-name").replace(" ", "%20")
        link = host + name + champ_stats
        links.append(link)
    driver.close()
    return links
champ_links = get_champ_links()
x = 0
for link in champ_links[8:9]:
    print(str(x) + " " + link)
    x += 1
    get_champion_data(link)

0 https://lolcounter.gg/aurelion%20sol/championstats.php?Lane=TOP
starting  aurelion%20sol
https://lolcounter.gg/aurelion%20sol/championstats.php?Lane=MIDDLE
starting loop on option platplus in lane MIDDLE
https://lolcounter.gg/aurelion%20sol/championstats.php?Lane=JUNGLE
starting loop on option platplus in lane JUNGLE
getting counter info per league per lane
https://lolcounter.gg/aurelion%20sol/championstatsplat.php?Lane=MIDDLE
https://lolcounter.gg/aurelion%20sol/championstatsplat.php?Lane=MIDDLE
starting loop on option plat in lane MIDDLE
https://lolcounter.gg/aurelion%20sol/championstatsplat.php?Lane=JUNGLE
starting loop on option plat in lane JUNGLE
getting counter info per league per lane
https://lolcounter.gg/aurelion%20sol/championstatsgold.php?Lane=MIDDLE
https://lolcounter.gg/aurelion%20sol/championstatsgold.php?Lane=MIDDLE
starting loop on option gold in lane MIDDLE
https://lolcounter.gg/aurelion%20sol/championstatsgold.php?Lane=JUNGLE
starting loop on option gold in lane JU