In [1]:
import bs4
from bs4 import BeautifulSoup
import requests
import pandas as pd
from tqdm import tqdm
import sys

In [2]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/135.0.0.0 Safari/537.36"
}

### Enter Details
Replace the fields SUMMONER_ID, SUMMONER_REGION and SUMMONER_CHAMPION with what you need
#### Summoner ID
Usually in the form "{username}-{tag}"  
Eg: If your Riot ID is Natholiii#BEANS -> Natholiii-BEANS
#### Summoner Region
Replace SUMMONER_REGION with the code as given in the table
| Region    | Code |
| -------- | ------- |
| Singapore  | sg    |
| Japan  | jp    |
| North America  | na    |
#### Summoner champion
Enter the name of the champion in lowercase, ignore any spaces or commas

In [3]:
SUMMONER_ID = "Natholiii-BEANS"
SUMMONER_REGION = "sg"
SUMMONER_CHAMPION = "leona"

In [4]:
page = requests.get("https://www.leagueofgraphs.com/summoner/champions/{}/{}/{}".format(SUMMONER_CHAMPION,SUMMONER_REGION,SUMMONER_ID), headers=headers)

In [5]:
soup = BeautifulSoup(page.text,"html.parser")

## Explaining Steps|

### Navigating to the table with recent games

body -> {div; id : pageContainer , class : row}} -> {div; id: pageContent} -> {div; id: mainContentSuperContainer}
-> {div; id: mainContentContainer} -> {div; id: mainContent} -> {div; class: row summoner_performances}
-> {div; class: medium-9 small-24 columns} -> {div; class: box box-padding-10-10 recentGamesBox}
-> {table; class: data_table relative recentGamesTable performanceRecentGamesTable}

In [6]:
pageContainer = soup.body.find("div", attrs = {"id":"pageContainer","class":"row"})

In [7]:
pageContent = pageContainer.find_all("div", attrs = {"id":"pageContent"})
if len(pageContent) == 1:
    pageContent = pageContainer.find("div", attrs = {"id":"pageContent"})
else:
    print("Multiple pageContents found, check for more attributes")

In [8]:
mainContentSuperContainer = pageContent.find_all("div", attrs = {"id":"mainContentSuperContainer"})
if len(mainContentSuperContainer) == 1:
    mainContentSuperContainer = pageContainer.find("div", attrs = {"id":"mainContentSuperContainer"})
else:
    print("Multiple mainContentSuperContainers found, check for more attributes")

In [9]:
mainContentContainer = mainContentSuperContainer.find_all("div", attrs = {"id":"mainContentContainer"})
if len(mainContentContainer) == 1:
    mainContentContainer = mainContentSuperContainer.find("div", attrs = {"id":"mainContentContainer"})
else:
    print("Multiple mainContentContainer found, check for more attributes")

In [10]:
mainContent = mainContentContainer.find_all("div", attrs = {"id":"mainContent"})
if len(mainContent) == 1:
    mainContent = mainContentContainer.find("div", attrs = {"id":"mainContent"})
else:
    print("Multiple mainContents found, check for more attributes")

In [11]:
summoner_performances = mainContent.find_all("div", attrs = {"class":"row summoner_performances"})
if len(summoner_performances) == 1:
    summoner_performances = mainContent.find("div", attrs = {"class":"row summoner_performances"})
elif len(summoner_performances) == 0:
    print("None Found")
else:
    print("Multiple summoner_performances found, check for more attributes")

In [12]:
recentGamesBox = summoner_performances.find("div", attrs = {"class":"medium-9 small-24 columns"}).find("div", attrs = {"class":"box box-padding-10-10 recentGamesBox"})

In [13]:
recentGamesTable = recentGamesBox.find("table", attrs = {"class":"data_table relative recentGamesTable performanceRecentGamesTable"})

### Add all games to a list

#### Add the containers for the games

In [14]:
# First add 5 most recent games
recentGamesList = recentGamesTable.find_all("tr",attrs = {"class":""})

# Then append the hidden games
for game in recentGamesTable.find_all("tr",attrs = {"class":"see_more_hidden"}):
    recentGamesList.append(game)

#### Iterate and add links to each match

In [15]:
# Adding just the last 10
number_of_matches = 10
hrefs_matches = []

for i in range(number_of_matches):
    match_link = recentGamesList[i].find("td", attrs = {"class":"resultCellLight text-center"}).find("a")['href']
    hrefs_matches.append(match_link)

In [16]:
links_matches = []
for i in range(number_of_matches):
    links_matches.append("https://www.leagueofgraphs.com/" + hrefs_matches[i])

### Get the stats

#### Access a match

In [17]:
links_matches[0]

'https://www.leagueofgraphs.com//match/sg/105737004#participant5'

In [18]:
def return_game_table(match):
    game_page = requests.get(match, headers=headers)
    gameSoup = BeautifulSoup(game_page.text,"html.parser")
    gamepageContainer = gameSoup.body.find("div", attrs = {"id":"pageContainer","class":"row"})
    gamemainContentSuperContainer = gamepageContainer.find("div", attrs = {"id":"pageContent"}).find("div", attrs = {"id":"mainContentSuperContainer"})
    gamemainContentContainer = gamemainContentSuperContainer.find("div", attrs = {"id":"mainContent"})
    gameTable = gamemainContentContainer.find("table", attrs = {"class":"data_table matchTable"})
    return gameTable

In [19]:
gameTable = return_game_table(links_matches[0])

#### Find Leona and the opponents from the player list

In [20]:
def find_leona(gameTable):
    players = gameTable.find_all("tr",attrs = {"class":"playerRow"})
    # Support role will always be last in the list
    support_role = players[4]
    adc = players[3]
    # Is she red or blue side?
    left_side = "text-left summoner_column"
    right_side = "text-right summoner_column"
    stats = support_role.find_all("td", attrs = {"class":"kdaColumn hide-for-small-down requireTooltip noCursor"})
    champ_title = support_role.find("td",attrs = {"class":left_side}).find("img")['title']
    other_champ_title = support_role.find("td",attrs = {"class":right_side}).find("img")['title']
    #Is she on the left (of porofessor)?
    if champ_title == 'Leona':
        index = 0
        side = "left"
        enemy_sup = other_champ_title
        enemy_adc = adc.find("td",attrs = {"class":left_side}).find("img")['title']
    else:
        index = 1
        side = "right"
        enemy_sup = champ_title
        enemy_adc = adc.find("td",attrs = {"class":right_side}).find("img")['title']
    leona_stats = stats[index]
    return side, support_role, leona_stats, index, enemy_sup, enemy_adc

In [21]:
side, support_role, leona_stats, index, enemy_sup, enemy_adc = find_leona(gameTable)

#### Did you win or lose?

In [22]:
def match_result(gameTable,side):
    if side == "left":
        _ = gameTable.find("th",attrs = {"class":"text-left no-padding-right"})
    else:
        _ = gameTable.find("th",attrs = {"class":"text-right no-padding-left"})
    result = _.find_all("span")
    return result

In [23]:
if match_result(gameTable,side)[0].text.isalpha():
    result = match_result(gameTable,side)[0].text
else:
    result = match_result(gameTable,side)[1].text

#### Get Stats

In [24]:
def get_stats(leona_stats):
    # Kills, Deaths, Assists
    kda = [int(leona_stats.find("div",attrs = {"class":"kda"}).find('span',attrs = {"class":"kills"}).text), 
           int(leona_stats.find("div",attrs = {"class":"kda"}).find('span',attrs = {"class":"deaths"}).text),
           int(leona_stats.find("div",attrs = {"class":"kda"}).find('span',attrs = {"class":"assists"}).text)]

    _ = leona_stats.find_all("div",attrs = {"class":"cs"})

    csgold = _[0].text.split('-')
    vision = _[1].text.strip().split('-')[1].strip()

    cs_gold = [data.strip() for data in csgold]
    cs, gold = cs_gold[0],cs_gold[1]
    
    return kda, cs, gold, vision

In [25]:
kda, cs, gold, vision = get_stats(leona_stats)

#### Get Items

In [26]:
def get_items(support, index):
    items = support_role.find("td", attrs = {"class":"itemsColumn itemsColumn-" + str(index + 1) + "00"})
    itemlist = [item['alt'] for item in items.find_all("img", attrs = {"tooltip-class":"itemTooltip"})]
    return itemlist

In [27]:
items = get_items(support_role, index)

### Create an excel file to store data

### Add everything to a dataframe

In [28]:
column_names = ['kills','deaths','assists','cs','gold','vision','items','enemy champs','result']
dataframe = pd.DataFrame(columns = column_names)

In [29]:
if result == "Defeat":
    game_result = 0
else:
    game_result = 1

In [30]:
gold_earned = gold.split(' ')[0]
if 'k' in gold_earned:
    gold_int = int(float(gold_earned.split('k')[0]) * 1000)

In [31]:
 dataframe.loc[len(dataframe)] = [kda[0],kda[1],kda[2],int(cs.split(' ')[0]),gold_int,int(vision.split(' ')[1]),items,[enemy_adc,enemy_sup],game_result]

In [32]:
dataframe

Unnamed: 0,kills,deaths,assists,cs,gold,vision,result
0,2,4,4,24,5400,36,0


## Do it all at once

In [33]:
# First add 5 most recent games
recentGamesList = recentGamesTable.find_all("tr",attrs = {"class":""})

# Then append the hidden games
for game in recentGamesTable.find_all("tr",attrs = {"class":"see_more_hidden"}):
    recentGamesList.append(game)

In [34]:
def return_game_table(match):
    game_page = requests.get(match, headers=headers)
    gameSoup = BeautifulSoup(game_page.text,"html.parser")
    gamepageContainer = gameSoup.body.find("div", attrs = {"id":"pageContainer","class":"row"})
    gamemainContentSuperContainer = gamepageContainer.find("div", attrs = {"id":"pageContent"}).find("div", attrs = {"id":"mainContentSuperContainer"})
    gamemainContentContainer = gamemainContentSuperContainer.find("div", attrs = {"id":"mainContent"})
    gameTable = gamemainContentContainer.find("table", attrs = {"class":"data_table matchTable"})
    return gameTable

In [35]:
def find_leona(gameTable):
    players = gameTable.find_all("tr",attrs = {"class":"playerRow"})
    # Support role will always be last in the list
    support_role = players[4]
    adc = players[3]
    # Is she red or blue side?
    left_side = "text-left summoner_column"
    right_side = "text-right summoner_column"
    stats = support_role.find_all("td", attrs = {"class":"kdaColumn hide-for-small-down requireTooltip noCursor"})
    champ_title = support_role.find("td",attrs = {"class":left_side}).find("img")['title']
    other_champ_title = support_role.find("td",attrs = {"class":right_side}).find("img")['title']
    #Is she on the left (of porofessor)?
    if champ_title == 'Leona':
        index = 0
        side = "left"
        enemy_sup = other_champ_title
        enemy_adc = adc.find("td",attrs = {"class":left_side}).find("img")['title']
    else:
        index = 1
        side = "right"
        enemy_sup = champ_title
        enemy_adc = adc.find("td",attrs = {"class":right_side}).find("img")['title']
    leona_stats = stats[index]
    return side, support_role, leona_stats, index, enemy_sup, enemy_adc

In [36]:
def match_result(gameTable,side):
    if side == "left":
        _ = gameTable.find("th",attrs = {"class":"text-left no-padding-right"})
    else:
        _ = gameTable.find("th",attrs = {"class":"text-right no-padding-left"})
    result = _.find_all("span")
    return result

In [37]:
def get_stats(leona_stats):
    # Kills, Deaths, Assists
    kda = [int(leona_stats.find("div",attrs = {"class":"kda"}).find('span',attrs = {"class":"kills"}).text), 
           int(leona_stats.find("div",attrs = {"class":"kda"}).find('span',attrs = {"class":"deaths"}).text),
           int(leona_stats.find("div",attrs = {"class":"kda"}).find('span',attrs = {"class":"assists"}).text)]

    _ = leona_stats.find_all("div",attrs = {"class":"cs"})

    csgold = _[0].text.split('-')
    vision = _[1].text.strip().split('-')[1].strip()

    cs_gold = [data.strip() for data in csgold]
    cs, gold = cs_gold[0],cs_gold[1]
    
    return kda, cs, gold, vision

In [38]:
def get_items(support, index):
    items = support.find("td", attrs = {"class":"itemsColumn itemsColumn-" + str(index + 1) + "00"})
    itemlist = [item['alt'] for item in items.find_all("img", attrs = {"tooltip-class":"itemTooltip"})]
    return itemlist

In [39]:
def create_dataset(recentGames):
    links_matches = []
    for i in recentGames:
        match_link = i.find("td", attrs = {"class":"resultCellLight text-center"}).find("a")['href']
        links_matches.append("https://www.leagueofgraphs.com/" + match_link)
    print("Match links obtained")
    print("Number of matches found: ",len(links_matches))
    column_names = ['kills','deaths','assists','cs','gold','vision','items','enemy champs','result']
    dataframe = pd.DataFrame(columns = column_names)
    i = 1
    print("Beginning to access matches and add to dataframe")
    for match in tqdm(links_matches):
        
        gameTable = return_game_table(match)
        
        side, support_role, leona_stats, index, enemy_sup, enemy_adc = find_leona(gameTable)
        
        if match_result(gameTable,side)[0].text.isalpha():
            result = match_result(gameTable,side)[0].text
        else:
            result = match_result(gameTable,side)[1].text
        kda, cs, gold, vision = get_stats(leona_stats)
        
        
        items = get_items(support_role, index)
        
        if result == "Defeat":
            game_result = 0
        else:
            game_result = 1
        gold_earned = gold.split(' ')[0]
        if 'k' in gold_earned:
            gold_int = int(float(gold_earned.split('k')[0]) * 1000)
        dataframe.loc[len(dataframe)] = [kda[0],kda[1],kda[2],int(cs.split(' ')[0]),gold_int,int(vision.split(' ')[1]),items,[enemy_adc,enemy_sup],game_result]
        i+=1
        
    print("\nAdded all matches")
    print("Saving data to league_matches.csv")
    dataframe.to_csv("league_matches.csv", index=False)

In [40]:
create_dataset(recentGamesList)

Match links obtained
Number of matches found:  98
Beginning to access matches and add to dataframe


  3%|██▌                                                                                | 3/98 [00:10<05:20,  3.38s/it]


KeyboardInterrupt: 