# Data Acquisition - Scraping Lolalytics

### Import libraries

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
from itertools import product
import json
import os

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

### Static Variables

In [2]:
#Costanti
TIERS = ['all', 'challenger',
        'master',
        'diamond',
        'emerald',
        'platinum',
        'gold',
        'silver',
        'bronze',
        'iron']

ROLES = ['top', 'jungle', 'middle', 'bottom', 'support']
ROOT_URL = 'https://lolalytics.com/lol/tierlist/?'
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

### Browser Configuration

In [3]:
# Configura le opzioni del browser
chrome_options = Options()
#chrome_options.add_argument("--headless")  # Esegui il browser in modalità headless (senza interfaccia grafica)
chrome_options.add_argument("--start-maximized")
# Avvia il browser
driver = webdriver.Chrome(options=chrome_options)

### Scraping Functions

In [4]:
def get_complete_url(root_url, **params):
    if not root_url.startswith(('http://', 'https://')):
        root_url = 'http://' + root_url  # Aggiunge il protocollo se non è presente

    # Costruisce i parametri dell'URL
    param_string = '&'.join([f'{key}={value}' for key, value in params.items()])

    return f'{root_url}{param_string}'

In [5]:
def get_soup_object(url):
    try:
        # Esegui la richiesta HTTP con Selenium
        driver.get(url)
        
        # Attendi fino a 10 secondi per il caricamento completo della pagina
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "//div[@class='TierList_list__j33gd']"))
        )
        
                
        # Ottieni il contenuto HTML dopo il completamento del caricamento
        html_content = driver.page_source
        
        # Utilizza BeautifulSoup per analizzare il contenuto HTML
        soup = BeautifulSoup(html_content, "html.parser")
        
        print("Richiesta SUCCESSO:", url)
        return soup
        
    except Exception as e:
        print("Errore durante l'esecuzione di Selenium:", e)
        return None

In [6]:
def get_rows_elements(soup_row):
    elements = soup_row.findAll('div', recursive = False)

    rank = elements[0].text
    champion_name = elements[2].a.text
    tier = elements[3].text
    lane = elements[4].img['alt']
    win_rate = elements[5].div.text
    pick_rate = elements[6].text
    ban_rate = elements[7].text
    pbi = elements[8].text
    games_played = elements[9].text.replace('.', '')

    dict_elements = {
        'rank': rank,
        'champion_name': champion_name,
        'tier': tier,
        'lane': lane,
        'win_rate': win_rate,
        'pick_rate': pick_rate,
        'ban_rate': ban_rate,
        'pbi': pbi,
        'games_played': games_played
    }

    return dict_elements




### Scraping execution function

In [7]:
def scrape_all():
    list_temp = []
    for tier_rank, role in product(TIERS, ROLES):
        print(f'{tier_rank} - {role}')
        params = {
            'lane': role,
            'tier': tier_rank
        }
        
        complete_url = get_complete_url(ROOT_URL, **params)
        soup = get_soup_object(complete_url)
        table = soup.find('div', class_ = 'TierList_list__j33gd').findAll('div', recursive=False)

        for soup_row in table:
            dict_elements = get_rows_elements(soup_row)

            dict_elements['tier_rank'] = tier_rank

            print('Appending dict: ', dict_elements)
            list_temp.append(dict_elements)
    
    return list_temp
        

In [8]:
list_temp = scrape_all()

all - top


Richiesta SUCCESSO: https://lolalytics.com/lol/tierlist/?lane=top&tier=all
Appending dict:  {'rank': '1', 'champion_name': 'Jax', 'tier': 'S+', 'lane': 'top', 'win_rate': '50.17', 'pick_rate': '10.14', 'ban_rate': '19.41', 'pbi': '2', 'games_played': '2078895', 'tier_rank': 'all'}
Appending dict:  {'rank': '2', 'champion_name': 'Fiora', 'tier': 'S+', 'lane': 'top', 'win_rate': '51.02', 'pick_rate': '4.86', 'ban_rate': '7.71', 'pbi': '5', 'games_played': '995378', 'tier_rank': 'all'}
Appending dict:  {'rank': '3', 'champion_name': 'Singed', 'tier': 'S', 'lane': 'top', 'win_rate': '52.73', 'pick_rate': '1.54', 'ban_rate': '0.49', 'pbi': '4', 'games_played': '316278', 'tier_rank': 'all'}
Appending dict:  {'rank': '4', 'champion_name': 'Olaf', 'tier': 'S', 'lane': 'top', 'win_rate': '50.68', 'pick_rate': '2.14', 'ban_rate': '1.88', 'pbi': '1', 'games_played': '439291', 'tier_rank': 'all'}
Appending dict:  {'rank': '5', 'champion_name': 'Camille', 'tier': 'S', 'lane': 'top', 'win_rate': '50

### Data Storage

In [9]:
df = pd.DataFrame(list_temp)
df

Unnamed: 0,rank,champion_name,tier,lane,win_rate,pick_rate,ban_rate,pbi,games_played,tier_rank
0,1,Jax,S+,top,50.17,10.14,19.41,2,2078895,all
1,2,Fiora,S+,top,51.02,4.86,7.71,5,995378,all
2,3,Singed,S,top,52.73,1.54,0.49,4,316278,all
3,4,Olaf,S,top,50.68,2.14,1.88,1,439291,all
4,5,Camille,S,top,50.16,3.77,1.74,1,773584,all
...,...,...,...,...,...,...,...,...,...,...
3675,99,Anivia,D,support,37.42,0.43,0.79,-3,4148,iron
3676,100,Tristana,D-,support,29.67,0.24,5.60,-4,2272,iron
3677,101,Lulu,D-,support,40.47,2.46,0.98,-9,23658,iron
3678,102,Miss Fortune,D-,support,41.60,1.80,8.79,-5,17353,iron


In [10]:
len(df[(df['tier_rank'] == 'all')]['champion_name'].unique())

166

In [None]:
df.to_csv('datasets/champions_lolalytics.csv', index = False)