In [1]:
# load libraries
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

In [2]:
def get_info(summonername):
    # request html document from champions played webpage (current season only, default option)

    # for a given user name
    user = summonername

    # url formatting
    user = user.replace(" ", "%20")
    url = "https://euw.op.gg/summoners/euw/{}/champions".format(user)

    # make request and soup it
    r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
    soup = BeautifulSoup(r.text, 'html.parser')

    # info extracting
    champion_list = soup.find(class_="content").find_all("tr")

    # get rid of table header
    champion_list = champion_list[1:]

    champion_info = []
    for champion in champion_list:
        # summoner name
        summoner_name = soup.find(class_="name").text
        # champion played
        champion_name = champion.find("img")['alt']
        # games won
        try:
            games_won = int(champion.find(class_="winratio-graph__text left").text.replace("W", ""))
        except:
            games_won = 0
        # games lost
        try:
            games_lost = int(champion.find(class_="winratio-graph__text right").text.replace("L", ""))
        except:
            games_lost = 0
        # games played
        games_played = games_won + games_lost
        # champion winrate
        champion_wr = games_won/games_played
        # kda
        try:
            kda = float(champion.find(class_="value").find_all("div")[1].text.replace(":1", ""))
        except:
            kda = 0
        # cs per minute
        cspm = champion.find_all(class_="value")[2].text
        cspm = cspm[cspm.find('(')+1:cspm.find(')')]

        champion_info.append([summoner_name, champion_name, games_played, champion_wr, kda, cspm])

    # create dataframe with champion info from a desired player
    champion_info = pd.DataFrame(champion_info, columns=["Summoner Name", "Champion", "Games Played", "Winrate", "KDA", "CS per minute"])

    # player info url
    url = "https://euw.op.gg/summoners/euw/{}".format(user)

    # make request and soup it
    r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
    games_info = BeautifulSoup(r.text, 'html.parser').find(class_="css-164r41r exlvoq30").find_all(class_="css-ja2wlz e19epo2o3")[0:20]

    game_info = []
    for game in games_info:
        if game.find(class_="info").find(class_="type").text=="Ranked Solo":      
            # summoner name
            summoner_name = soup.find(class_="name").text
            # summoner winrate
            try:
                summoner_wr = int(BeautifulSoup(r.text, 'html.parser').find(class_="win-lose").text[-3:-1])/100
            except:
                summoner_wr = 0
            # game result
            try:
                result = int(game.find(class_="game-result").text.replace("Victory", "1").replace("Defeat", "0"))
            except ValueError:
                result = 99
            # champion name
            champion_name = game.find(class_="champion").find(class_="name").text
            
            game_info.append([summoner_name, summoner_wr, champion_name, result])


    games_df = pd.DataFrame(game_info, columns=["Summoner Name", "Summoner Winrate", "Champion", "Result"])

    # Winrate in last 10 games
    try:
        games_df["WinRate Last 5"] = sum(games_df['Result'][0:5])/len(games_df[0:5])
    except:
        games_df["WinRate Last 5"] = 0

    # Merge champion and player data
    merged_data = pd.merge(champion_info, games_df, on=["Summoner Name", "Champion"], how="right")

    return merged_data

In [3]:
name_list = []
page_count = 0

for page in range(300, 1000):
    page_count += 1
    print("Player-names collected:", page_count*100, end='\r')

    # change url to new page
    url = "https://euw.op.gg/leaderboards/tier?page={}&region=euw".format(page)

    # requests html for each page
    r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
    soup = BeautifulSoup(r.text, 'html.parser')

    raw_names = soup.find_all(class_="css-1xdhyk6 ei93w700")
    for name in raw_names:
        name_list.append(name.find("strong").text)

Player-names collected: 400

KeyboardInterrupt: 

In [4]:
df = pd.DataFrame()
count = 0
for name in name_list:
    print("Progress:", round((count)/len(name_list)*100, 2), '%', end='\r')
    count += 1
    try:
        df = df.append(get_info(name))
    except AttributeError: 
        pass

Progress: 2.0 %%

KeyboardInterrupt: 

In [185]:
# save mining process
df.to_parquet("df1.parquet")