# Tierlist Scraper

1. The functions below work on lolalytics html files.
2. The lolalytics html files must be located in the same folder as the notebook.
3. It outputs csv functions in the same folder as the notebook.

In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

def get_champ_names():
  # Read HTML content from a file
  with open("LoL Tier List - LoLalytics LoL Tier List for Patch 15.13.html", 'r', encoding='utf-8') as file:
      html_cont = file.read()

  # Parse the HTML content
  soup = BeautifulSoup(html_cont, 'html.parser')

  # Extract text from all tags
  all_tags = soup.find_all()

  # find index position of champion data.
  endtierlist = "LoLalytics isn't endorsed by Riot Games and doesn't reflect the views"
  tierlistflag = False
  patch_tierlist_data = []
  for n, tag in enumerate(all_tags):
      if tag.get_text() == 'Elo':
        tierlistflag = True
      elif endtierlist in tag.get_text():
        tierlistflag = False
      elif tag.get_text() != '' and tierlistflag:
        patch_tierlist_data.append(tag.get_text())

  # get a list of champions to make it easier to parse through data later.
  champ_names = set()
  for n, line in enumerate(patch_tierlist_data[6::28]):
    if n <= 56:
      #print(n, line)
      champ_names.add(line)
  for n, line in enumerate(patch_tierlist_data[10::28]):
    if n > 56:
      #print(n, line)
      champ_names.add(line)
  champ_names.add('Yunara') # add yunara as she is not in this patch
  return champ_names
champ_names = get_champ_names()
len(champ_names)

171

In [2]:
def parse_data(patch, champ_names):
# Read HTML content from a file
  with open(f"LoL Tier List - LoLalytics LoL Tier List for Patch {patch}.html", 'r', encoding='utf-8') as file:
    html_cont = file.read()
  # Parse the HTML content
  soup = BeautifulSoup(html_cont, 'html.parser')
  # Extract text from all tags
  all_tags = soup.find_all()


  # reduce the html tags to only the champion tier list
  endtierlist = "LoLalytics isn't endorsed by Riot Games and doesn't reflect the views"
  tierlistflag = False
  patch_tierlist_data = []
  for n, tag in enumerate(all_tags):
    if tag.get_text() == 'Elo':
      tierlistflag = True
    elif endtierlist in tag.get_text():
      tierlistflag = False
    elif tag.get_text() != '' and tierlistflag:
      patch_tierlist_data.append(tag.get_text())


  # make a dictionary of empty lists
  champ_data_dict = {}
  # append data to champions in a dictorionary
  champ_flag = 'z'
  # append numerical data from patch data to a dictionary
  for line in patch_tierlist_data:
    if line in champ_names:
      champ_flag = line
    # add champions sequentially to dictionary to preserve rank order
    if not(champ_flag in champ_data_dict.keys()) and champ_flag in champ_names:
      champ_data_dict[champ_flag] = []
    # set lines that can turn into floats into the dictionary for further parsing
    if champ_flag in champ_data_dict.keys():
      try:
        line = float(line)
        champ_data_dict[champ_flag].append(line)
      except: continue


  # select proper values, all decimals in percentages
  champ_rates = {
      'patch':[],
      'champion':[],
      'win_rate':[],
      'pick_rate':[],
      'ban_rate':[],
      'rank':[],
  }
  for n, key in enumerate(champ_data_dict.keys()):
    if len(champ_data_dict[key]) > 0: # in case yunara isn't in the patch
      #if champ_data_dict[key][2] > 52:
      #  print(f'{n+1} {key}: \nWinRate:{champ_data_dict[key][2]} PickRate:{champ_data_dict[key][5]} BanRate:{champ_data_dict[key][6]}\n')
      champ_rates['patch'].append(patch)
      champ_rates['champion'].append(key)
      champ_rates['win_rate'].append(champ_data_dict[key][2])
      champ_rates['pick_rate'].append(champ_data_dict[key][5])
      champ_rates['ban_rate'].append(champ_data_dict[key][6])
      champ_rates['rank'].append(n+1)

  pd.DataFrame(champ_rates).to_csv(f'lolpatch_{patch}.csv', index=False)

In [3]:
# run the scraper for patches 13 through 20
for patch in range(13,21):
  patch = f'15.{patch}'
  parse_data(patch,champ_names)