## 0. Setup

In [14]:
%pip install tqdm


Note: you may need to restart the kernel to use updated packages.


In [15]:
import requests
import pandas
from re import sub
from tqdm import tqdm

## 1. Utility functions and constants

In [16]:
API_URL = "https://akasha.cv/api/leaderboards"
INFINITY = float("inf")
MAX_PAGES = 1000  # 1000 pages * 20 entries per page = 20_000 entries


In [17]:
def snake_case(s):
    """
    Utility function to convert a string to 'this_format'
    """
    return "_".join(
        sub("([A-Z][a-z]+)", r" \1",
            sub("([A-Z]+)", r" \1",
                s.replace("-", " "))).split()).lower()


## 2. Fetch the data from Akasha.cv API

In [18]:
def get_akasha_lb(calculation_id, variant="", lower_bound=INFINITY):
    """
    Gets a single page from the Akasha.cv API for the given calculation id and variant,
    applying the given lower bound constraint to the results.
    """
    params = {
        "sort": "calculation.result",
        "order": -1,
        "p": "lt|" + "Infinity" if lower_bound == INFINITY else str(lower_bound),
        "calculationId": calculation_id,
        "variant": variant
    }
    data = requests.get(API_URL, params).json()

    ## Insert the variant into the data
    for entry in data["data"]:
        entry["variant"] = variant

    return data["data"]


In [19]:
def get_akasha(calculation_id, variant="", max=100):
    """
    Gets the data from the Akasha.cv API for the given calculation id and variant,
    with a maximum number of pages.
    """

    results = []

    desc = f"Getting Akasha data for {calculation_id} (Variant {'None' if variant=='' else variant})"

    lb = INFINITY
    for _ in tqdm(range(max), desc=desc):
        page = get_akasha_lb(calculation_id, variant, lb)
        if len(page) == 0:
            break

        results += page
        lb = page[-1]["calculation"]["result"]

    return results


## 3. Transform the JSON data into a DataFrame

In [20]:
def normalize_akasha_data_row(json):
    """
    Convert a row of the Akasha.cv API response to a flat dictionary
    """
    def get_mainstat(name):
        name = json["artifactObjects"][name]["mainStatKey"] \
            if name in json["artifactObjects"] \
            else None
        return snake_case(name) if name else None

    def get_set(n_piece, index=0):
        """
        Get the n_piece-piece set
        """
        i = 0

        for set_name in json["artifactSets"]:
            set_count = json["artifactSets"][set_name]["count"]
            if set_count == n_piece:
                if i == index:
                    return snake_case(set_name)
                i += 1

        return None

    def get_stats():
        dict = {}
        for stat in json["stats"]:
            dict[snake_case(stat)] = json["stats"][stat]["value"]
        return dict

    flat = {
        "uid": json["uid"],
        "name": snake_case(json["name"]),
        "player": json["owner"]["nickname"],
        "calculation_id": json["calculation"]["id"],
        "variant": json["variant"],
        "calculation_result": json["calculation"]["result"],
        "crit_value": json["critValue"],
        "sands": get_mainstat("EQUIP_SHOES"),
        "goblet": get_mainstat("EQUIP_RING"),
        "circlet": get_mainstat("EQUIP_DRESS"),
        "real_weapon": snake_case(json["weapon"]["name"]),
        "set_2_1": get_set(2),
        "set_2_2": get_set(2, 1),
        "set_4": get_set(4),
        "stats": get_stats()
    }
    return flat


In [25]:
def normalize_akasha_data(data):
    """
    Convert the Akasha.cv API response to a flat dictionary
    """
    dfs = []
    for row in tqdm(data):
        flat = normalize_akasha_data_row(row)
        flat = pandas.json_normalize(flat)
        dfs.append(flat)
    
    df = pandas.concat(dfs, ignore_index=True)
    return df


## 4. Get the data

In [22]:
raw_data = [
    *get_akasha("1000003300", max=MAX_PAGES), # Tartaglia - Childe International, Avg DMG (Polar)
    *get_akasha("1000002300", max=MAX_PAGES), # Xiangling - National Pyronado Vape Avg DMG (The Catch)
    *get_akasha("1000002300", "200er", max=MAX_PAGES), # Xiangling - National Pyronado Vape Avg DMG (The Catch)
    *get_akasha("1000005200", max=MAX_PAGES), # Shogun Raiden - Hyper Raiden Burst, Avg DMG (Engulfing Lightning)
    *get_akasha("1000006000", "190er", max=MAX_PAGES), # Yelan - Exquisite Throw Double Hydro, Avg. DMG (Elegy)
    *get_akasha("1000006001", "190er", max=MAX_PAGES), # Yelan - Exquisite Throw Double Hydro, Avg. DMG (Aqua)
    *get_akasha("1000004601", "not-shimenawa", max=MAX_PAGES), # Hu Tao - 11N1CD + Q, Xingqiu/Yelan/Zhongli, Avg. DMG (Homa)
]


Getting Akasha data for 1000003300 (Variant None):   0%|          | 0/1000 [00:00<?, ?it/s]

Getting Akasha data for 1000003300 (Variant None): 100%|██████████| 1000/1000 [06:35<00:00,  2.53it/s]
Getting Akasha data for 1000002300 (Variant None): 100%|██████████| 1000/1000 [06:37<00:00,  2.52it/s]
Getting Akasha data for 1000002300 (Variant 200er): 100%|██████████| 1000/1000 [06:37<00:00,  2.52it/s]
Getting Akasha data for 1000005200 (Variant None): 100%|██████████| 1000/1000 [06:41<00:00,  2.49it/s]
Getting Akasha data for 1000006000 (Variant 190er): 100%|██████████| 1000/1000 [06:44<00:00,  2.47it/s]
Getting Akasha data for 1000006001 (Variant 190er): 100%|██████████| 1000/1000 [06:38<00:00,  2.51it/s]
Getting Akasha data for 1000004601 (Variant not-shimenawa): 100%|██████████| 1000/1000 [06:45<00:00,  2.46it/s]


In [26]:
data = normalize_akasha_data(raw_data)
data = data.drop(columns=["uid"]) # drop to avoid showing the uid in the output

100%|██████████| 140000/140000 [01:32<00:00, 1519.81it/s]


In [27]:
data


Unnamed: 0,name,player,calculation_id,variant,calculation_result,crit_value,sands,goblet,circlet,real_weapon,...,stats.atk,stats.def,stats.elemental_mastery,stats.energy_recharge,stats.healing_bonus,stats.crit_rate,stats.crit_damage,stats.hydro_damage_bonus,stats.pyro_damage_bonus,stats.electro_damage_bonus
0,tartaglia,Уильям,1000003300,,6.875992e+05,269.769998,atk%,hydro_dmg_bonus,crit_dmg,thundering_pulse,...,1747.983611,963.222037,41.959999,1.0518,0,0.9602,2.0389,0.904,,
1,tartaglia,,1000003300,,6.872438e+05,273.669997,atk%,hydro_dmg_bonus,crit_dmg,polar_star,...,2018.278597,916.283744,182.570000,1.0000,0,0.9486,2.1011,0.754,,
2,tartaglia,しゃけ,1000003300,,6.788938e+05,247.989998,atk%,hydro_dmg_bonus,crit_dmg,polar_star,...,1960.274943,893.367638,39.629999,1.0000,0,0.9252,1.8911,0.904,,
3,tartaglia,ケイ素,1000003300,,6.735325e+05,236.329998,atk%,hydro_dmg_bonus,crit_dmg,polar_star,...,1840.049053,835.497637,149.189997,1.0000,0,0.8707,1.8835,0.904,,
4,tartaglia,Devilmarika,1000003300,,6.730070e+05,240.979996,atk%,hydro_dmg_bonus,crit_rate,thundering_pulse,...,2005.279200,835.497638,72.270000,1.1231,0,1.0185,1.6344,0.904,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139995,hu_tao,aMu.,1000004601,not-shimenawa,1.080300e+06,250.339998,elemental_mastery,pyro_dmg_bonus,crit_rate,staff_of_homa,...,1638.880181,1064.665082,294.230000,1.0583,0,0.8860,2.3769,,0.616,
139996,hu_tao,泪子,1000004601,not-shimenawa,1.078146e+06,246.459997,hp%,pyro_dmg_bonus,crit_rate,staff_of_homa,...,1772.400115,1003.227236,163.179998,1.0000,0,0.8355,2.4391,,0.616,
139997,hu_tao,Фуфел,1000004601,not-shimenawa,1.077851e+06,240.999997,hp%,pyro_dmg_bonus,crit_rate,staff_of_homa,...,1826.092852,876.151993,167.839998,1.0518,0,0.8432,2.3691,,0.616,
139998,hu_tao,ohplease,1000004601,not-shimenawa,1.077142e+06,242.549996,elemental_mastery,pyro_dmg_bonus,crit_rate,staff_of_homa,...,1661.475695,937.035753,280.250000,1.0000,0,0.8665,2.3380,,0.616,


In [28]:
# Transformed data is saved to a CSV file so it can
# be recovered later if you don't want to re-download
with open("akasha_data.csv", "w") as f:
    f.write(data.to_csv())
    f.close()

In [None]:
# To recover the data from the local CSV file,
# Omit all the download code above (4th section) and 
# run only the following:
# data = pandas.read_csv("akasha_data.csv")