In [71]:
#!pip install selenium

In [72]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time
import os
import re
import pandas as pd

In [73]:
chrome_options = Options()
chrome_options.add_argument("--headless")

driver = webdriver.Chrome()

In [74]:
driver.get("https://www.mobafire.com/league-of-legends/build/step1v9-guide-full-matchups-for-every-champion-635422")
time.sleep(5)

In [75]:
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')

row_divs = soup.find_all('div', class_='row')

results = []
for div in row_divs:
    # Extract h4, label, and p elements from this div
    h4_text = div.find('h4').text.strip() if div.find('h4') else None
    label_text = div.find('label').text.strip() if div.find('label') else None
    p_text = div.find('p').text.strip() if div.find('p') else None
    
    # Store the extracted data
    results.append({
        'h4': h4_text,
        'label': label_text,
        'p': p_text
    })

In [77]:
matchup_df = pd.DataFrame(results)
matchup_df['h4'] = matchup_df['h4'].apply(lambda x: re.sub(r'[^\w]', '', x))
matchup_df.drop_duplicates(inplace=True)

#missing champions
missing_champs = [['Mel','Blank','Blank'],['Ambessa','Blank','Blank'],['Aurora','Blank','Blank']]
for row in missing_champs:
    matchup_df = pd.concat([pd.DataFrame([row], columns=matchup_df.columns), matchup_df], ignore_index=True)

# Group by h4 and aggregate
matchup_df = matchup_df.groupby('h4', as_index=False).agg({
    'label': 'first',  # Keep the first label value
    'p': lambda x: ' '.join(str(item) for item in x if pd.notna(item))  # Concatenate all p values
})

matchup_df["icon"] = "https://ddragon.leagueoflegends.com/cdn/13.24.1/img/champion/" + matchup_df["h4"] + ".png"
matchup_df.rename(columns={'h4':'champion','label':'threat_level','p':'notes','icon':'icon_url'}, inplace=True)
column_to_move = matchup_df.pop("icon_url")
matchup_df.insert(1, "icon_url", column_to_move)

matchup_df.sort_values(by='champion')
matchup_df.to_csv('matchup_df.csv',index=False)

In [67]:
matchup_df['champion'].unique()

array(['Aatrox', 'Ahri', 'Akali', 'Akshan', 'Alistar', 'Amumu', 'Anivia',
       'Annie', 'Aphelios', 'Ashe', 'AurelionSol', 'Azir', 'Bard',
       'BelVeth', 'Blitzcrank', 'Brand', 'Braum', 'Briar', 'Caitlyn',
       'Camille', 'Cassiopeia', 'ChoGath', 'Corki', 'Darius', 'Diana',
       'DrMundo', 'Draven', 'Ekko', 'Elise', 'Evelynn', 'Ezreal',
       'Fiddlesticks', 'Fiora', 'Fizz', 'Galio', 'Gangplank', 'Garen',
       'Gnar', 'Gragas', 'Graves', 'Gwen', 'Hecarim', 'Heimerdinger',
       'Hwei', 'Illaoi', 'Irelia', 'Ivern', 'Janna', 'JarvanIV', 'Jax',
       'Jayce', 'Jhin', 'Jinx', 'KSante', 'KaiSa', 'Kalista', 'Karma',
       'Karthus', 'Kassadin', 'Katarina', 'Kayle', 'Kayn', 'Kennen',
       'KhaZix', 'Kindred', 'Kled', 'KogMaw', 'LeBlanc', 'LeeSin',
       'Leona', 'Lillia', 'Lissandra', 'Lucian', 'Lulu', 'Lux',
       'Malphite', 'Malzahar', 'Maokai', 'MasterYi', 'Milio',
       'MissFortune', 'Mordekaiser', 'Morgana', 'Naafiri', 'Nami',
       'Nasus', 'Nautilus', 'Neeko', 'N