In [7]:
import pandas as pd
import numpy as np  
import json
import requests
from bs4 import BeautifulSoup

In [2]:
df = pd.read_csv("combined_league_matches.csv")
champion_ids = [col for col in df.columns if 'champion' in col]
all_games = pd.concat([df[col] for col in champion_ids]) 
games_per_champ = all_games.value_counts() #maybe useful, find total number of games per champ id


In [12]:
#scrapes all opgg winrates per champion in order from top to bottom of list


url = "https://www.op.gg/champions"

headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers)

if response.status_code == 200:
    soup = BeautifulSoup(response.content, "html.parser")
    
    table_rows = soup.select("table tbody tr")
    
    champions = {}
    for row in table_rows:

        img_tag = row.select_one("td a img")
        name = img_tag["alt"].strip() if img_tag else "Unknown"

        win_rate = row.select("td")[4].text.strip()

        if name == "Kog'Maw":
            name = "KogMaw"

        if name == "Cho'Gath":
            name = "Chogath"
        
        if name == "Dr. Mundo":
            name = "Dr Mundo"
            
        if name == "Rek'Sai":
            name = "RekSai"

        if name == "Kha'Zix":
            name = "Khazix"

        if name == "LeBlanc":
            name = "Leblanc"

        if name == "K'Sante":
            name = "KSante"

        if name == "Vel'Koz":
            name = "Velkoz"

        if name == "Kai'Sa":
            name = "Kaisa"
        
        if name == "Bel'Veth":
            name = "Belveth"

        if name not in champions:
            champions[name] = []
        champions[name].append(win_rate)
    
    #for champion, win_rates in champions.items():
    #    print(f"{champion}: {win_rates}")
        
        output_file_path = "champion_opgg_winratedata.json"
        with open(output_file_path, "w") as file:
            json.dump(champions, file, indent=2)
    
    #if we wanted to just keep old file format and just update
    json_file_path = "champions.json"  
    with open(json_file_path, "r") as file:
        json_data = json.load(file)

    for champ in json_data:
        champ_name = champ.get("id")  
        if champ_name in champions:
            champ["win_rates"] = champions[champ_name] 
    
    
    updated_json_file_path = "updated_champion_data.json"
    with open(updated_json_file_path, "w") as file:
        json.dump(json_data, file, indent=2)

else:
    print(f"Failed to fetch data. Status code: {response.status_code}")


In [9]:
#replaces all champ string names with the id values in dictionary

json_file_path = "champions.json"  
with open(json_file_path, "r") as file:
    json_data = json.load(file)


id_to_key_mapping = {entry["id"]: entry["key"] for entry in json_data}


champions_with_keys = {}
for champion, win_rates in champions.items():
    champion_key = id_to_key_mapping.get(champion, "Unknown") 
    champions_with_keys[champion_key] = win_rates


#for key, win_rates in champions_with_keys.items():
#    print(f"{key}: {win_rates}")
    
output_file_path = "champions_with_keys.json"
with open(output_file_path, "w") as file:
    json.dump(champions_with_keys, file, indent=2)

In [10]:
#Want to now reduce each winrate to a single value, current method is finding the mean of all winrates for all roles.

#consider dropping severely bad winrates when offrole? set a minwinrate of like 48 and max winrate of like 54?

champions_with_mean_winrate = {}
for key, win_rates in champions_with_keys.items():

    win_rates_float = [float(rate.strip('%')) for rate in win_rates]
    mean_win_rate = sum(win_rates_float) / len(win_rates_float)
    

    champions_with_mean_winrate[key] = f"{mean_win_rate:.2f}%"

output_file_path = "champions_with_mean_winrate.json"
with open(output_file_path, "w") as file:
    json.dump(champions_with_mean_winrate, file, indent=2)


#for key, mean_win_rate in champions_with_mean_winrate.items():
#    print(f"{key}: {mean_win_rate}")

In [6]:
#List of useful structures
champions #has the winrates of all champions using the string version of their name as the key
champions_with_keys #has list of winrates of all champions using the id from the json file as the key
champions_with_mean_winrate #has mean winrate of each champion using id from the json file as the key


{'799': '49.67%',
 '895': '53.33%',
 '711': '52.53%',
 '50': '50.44%',
 '223': '51.97%',
 '10': '51.06%',
 '96': '51.95%',
 '19': '51.61%',
 '31': '51.95%',
 '267': '51.68%',
 '57': '51.70%',
 '92': '51.65%',
 '54': '51.83%',
 '516': '51.41%',
 '98': '50.86%',
 '56': '51.32%',
 '17': '49.84%',
 '79': '50.29%',
 '117': '51.30%',
 '42': '49.59%',
 '60': '51.07%',
 '141': '51.05%',
 '103': '51.02%',
 '82': '51.00%',
 '164': '50.36%',
 '39': '51.00%',
 '36': '50.90%',
 '3': '50.16%',
 '106': '50.64%',
 '22': '50.65%',
 '122': '50.43%',
 '222': '50.40%',
 '104': '50.37%',
 '266': '50.26%',
 '234': '50.26%',
 '202': '50.21%',
 '58': '50.55%',
 '517': '49.75%',
 '51': '49.43%',
 '69': '53.19%',
 '30': '51.90%',
 '45': '51.75%',
 '78': '51.25%',
 '910': '50.64%',
 '147': '51.32%',
 '13': '51.72%',
 '163': '51.06%',
 '157': '49.81%',
 '34': '52.14%',
 '40': '51.94%',
 '166': '51.89%',
 '115': '50.17%',
 '85': '52.02%',
 '240': '51.58%',
 '20': '51.52%',
 '9': '51.42%',
 '21': '51.40%',
 '235': 

In [15]:

with open("champions_with_keys.json", "r") as file:
    champion_data = json.load(file)


def process_win_rates(win_rates):

    win_rates = [float(rate.strip("%")) for rate in win_rates]
    

    win_rates = [50.0 if rate < 48 or rate > 55 else rate for rate in win_rates]
    

    return round(sum(win_rates) / len(win_rates), 2)  


processed_data = {champion_id: process_win_rates(rates) for champion_id, rates in champion_data.items()}


with open("outlier_removed_champion_winrates.json", "w") as file:
    json.dump(processed_data, file, indent=2)


print("Processed Champion Win Rates:")
print(json.dumps(processed_data, indent=2))

Processed Champion Win Rates:
{
  "799": 50.91,
  "895": 53.33,
  "711": 52.53,
  "50": 50.44,
  "223": 51.97,
  "10": 51.06,
  "96": 51.95,
  "19": 51.61,
  "31": 51.95,
  "267": 51.68,
  "57": 51.7,
  "92": 51.65,
  "54": 51.83,
  "516": 51.41,
  "98": 50.86,
  "56": 51.32,
  "17": 49.84,
  "79": 50.29,
  "117": 51.3,
  "42": 49.59,
  "60": 51.07,
  "141": 51.05,
  "103": 51.02,
  "82": 51.0,
  "164": 50.36,
  "39": 51.0,
  "36": 50.9,
  "3": 50.16,
  "106": 50.64,
  "22": 50.65,
  "122": 50.43,
  "222": 50.4,
  "104": 50.37,
  "266": 50.26,
  "234": 50.26,
  "202": 50.21,
  "58": 50.55,
  "517": 49.75,
  "51": 49.43,
  "69": 53.19,
  "30": 51.9,
  "45": 51.75,
  "78": 51.25,
  "910": 50.64,
  "147": 51.32,
  "13": 51.72,
  "163": 51.06,
  "157": 50.5,
  "34": 52.14,
  "40": 51.94,
  "166": 51.89,
  "115": 50.17,
  "85": 52.02,
  "240": 51.58,
  "20": 51.52,
  "9": 51.42,
  "21": 51.4,
  "235": 51.38,
  "91": 51.14,
  "201": 51.34,
  "421": 51.32,
  "6": 51.18,
  "15": 51.13,
  "127"

In [16]:

with open("champions_with_keys.json", "r") as file:
    champion_data = json.load(file)


def process_win_rates(win_rates):

    win_rates = [float(rate.strip("%")) for rate in win_rates]
    

    return round(max(win_rates), 2)  


processed_data = {champion_id: process_win_rates(rates) for champion_id, rates in champion_data.items()}


with open("champion_max_winrates.json", "w") as file:
    json.dump(processed_data, file, indent=2)


print("Processed Champion Max Win Rates:")
print(json.dumps(processed_data, indent=2))

Processed Champion Max Win Rates:
{
  "799": 51.58,
  "895": 53.33,
  "711": 52.53,
  "50": 52.33,
  "223": 52.06,
  "10": 51.98,
  "96": 51.95,
  "19": 51.87,
  "31": 52.07,
  "267": 51.68,
  "57": 51.72,
  "92": 51.65,
  "54": 52.1,
  "516": 51.41,
  "98": 51.35,
  "56": 51.32,
  "17": 51.31,
  "79": 51.3,
  "117": 51.3,
  "42": 51.11,
  "60": 51.07,
  "141": 51.05,
  "103": 51.02,
  "82": 51.0,
  "164": 50.97,
  "39": 51.08,
  "36": 50.93,
  "3": 50.93,
  "106": 50.78,
  "22": 50.65,
  "122": 50.43,
  "222": 50.4,
  "104": 50.37,
  "266": 50.26,
  "234": 50.26,
  "202": 50.21,
  "58": 51.04,
  "517": 49.97,
  "51": 49.43,
  "69": 53.99,
  "30": 53.35,
  "45": 52.97,
  "78": 52.46,
  "910": 52.43,
  "147": 52.42,
  "13": 52.28,
  "163": 52.27,
  "157": 52.18,
  "34": 52.14,
  "40": 51.94,
  "166": 51.89,
  "115": 51.72,
  "85": 52.45,
  "240": 51.58,
  "20": 51.52,
  "9": 51.42,
  "21": 51.4,
  "235": 51.38,
  "91": 51.35,
  "201": 51.34,
  "421": 51.32,
  "6": 51.18,
  "15": 51.13,
