Imports and endpoints

In [67]:
import requests
import pandas as pd
pd.set_option('display.max_columns', 100)
from time import sleep

urls = {
    'api' : 'https://api.mtga.untapped.gg/api/v1/',
    'json' : 'https://mtgajson.untapped.gg/v1/latest/',
}

endpoints = {
    'active': ('api', 'meta-periods/active'),
    'analytics': ('api', 'analytics/query/card_stats_by_archetype_event_and_scope_free/ALL?MetaPeriodId='),
    'cards': ('json', 'cards.json'),
    'text': ('json', 'loc_en.json'),
}

headers = {
    'authority': 'api.mtga.untapped.gg',
    'accept': '*/*',
    'accept-language': 'en-US,en;q=0.9,pt;q=0.8',
    'if-none-match': '"047066ff947f01e9e609ca4cf0d6c0a6"',
    'origin': 'https://mtga.untapped.gg',
    'referer': 'https://mtga.untapped.gg/',
    'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-site',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
}

def request(keyword, send_headers=True, format=''):
    'Returns JSON from corresponding keyword'

    url_kw, endpoint = endpoints[keyword]
    url = urls[url_kw]
    
    sleep(1) # :)
    if send_headers:
        return requests.get(url+endpoint+format, headers).json()
    else:
        return requests.get(url+endpoint+format).json()

Active

In [68]:
def request_active():
    'Returns format ID and lists of standard legal sets'
    latest = dict()

    # Extracting information from the lastest standard BO1 format
    for format in request('active'):
        if format['event_name'] == 'Ladder':
            latest = format

    return str(latest['id']), latest['legal_sets']

format_id, legal_sets = request_active()
print(format_id, legal_sets)

355 ['MID', 'VOW', 'NEO', 'SNC', 'DMU', 'BRO', 'ONE', 'MOM']


Cards

In [69]:
def request_cards(sets):
    'Returns transformed cards dataframe'
    df = pd.DataFrame(request('cards'))

    # Ony bother with standard legal cards
    df = df[df.set.isin(sets)]

    # Remove duplicates by considering only the latest reprint
    gb = df.groupby('titleId').agg({'grpid':'max'})
    df = df[df.grpid.isin(gb.grpid)]

    return df.set_index('grpid')

cards = request_cards(legal_sets)

Text

In [70]:
def request_text():
    'Returns card text dataframe'
    df = pd.DataFrame(request('text')).set_index('id')

    # Collapse columns raw and text, prioritizing raw
    df.loc[~df.raw.isna(), 'text'] = df.raw
    df.drop('raw', axis='columns', inplace=True)

    return df

text = request_text()

Card information

In [71]:
def get_card_information(sets):
    "Combine cards and text into format context"    
    text = request_text()
    cards = request_cards(sets)

    # Card columns mappable to text
    text_columns = ['titleId', 'flavorId', 'cardTypeTextId', 'subtypeTextId']

    # Additional transformed columns
    transformed = (
        cards[text_columns]
        .applymap(lambda x: text.loc[x].values[0], na_action='ignore')
        .rename(columns={column: column[:-2] for column in text_columns}) # Remove Id
    )
    cards = cards.join(transformed)

    # Remove undesirable columns and change index to titleId
    cards = cards.dropna(how='all', axis='columns')
    ignore = [ # These don't seem useful
        'collectorMax', 
        'frameColors',
        'rawFrameDetails',
        'frameDetails',
        'grpid',
        'altDeckLimit',
        'IsRebalanced',
        'isToken',
        'IsDigitalOnly',
        'linkedFaceType',
        'cardTypeTextId',
        'watermark',
        'RebalancedCardLink',
        'flavorId',
        'subtypeTextId',
        'collectorNumber',
        'artistCredit',
        'altTitleId',
        'isSecondaryCard',
    ]
    problematic = [ # Still work to be done here
        'abilities', 
        'abilityIdToLinkedTokenGrpId', 
        'hiddenAbilities',
        'types',
        'subtypes',
        'supertypes',
        'colors',
        'colorIdentity',
        'linkedFaces',
    ]
    cards = (cards.reset_index()
                  .set_index('titleId')
                  .drop(ignore + problematic, axis = 'columns'))

    return cards

card_information = get_card_information(legal_sets)

Analytics

In [72]:
def request_analytics(format):
    json = request('analytics', False, format)

    # Separate into data and medatada
    metadata = json['metadata']
    df = pd.json_normalize(json['data']).T

    # Number of games per tier
    games = {tier[0]: metadata['games'][tier]['ALL'] 
             for tier in metadata['games']}

    # Nested list with two levels
    level_1 = ['games', 'wins', 'check', 'copies']
    level_2 = ['copies_1', 'copies_2', 'copies_3', 'copies_4']

    # Unnest the list in stages
    # When we have a new set, sometimes Untapped chooses to mix BO1 and BO3 statistics
    # Thus we make sure to only get BO1 statistics for consistency.
    df[level_1] = pd.DataFrame(df.explode(0)[0].to_list(), index=df.index).iloc[:, :4]
    df[level_2] = pd.DataFrame(df.copies.to_list(), index=df.index)

    # Remove redundant columns and fill NaN with zeros 
    df.drop([0, 'check', 'copies'], axis = 'columns', inplace = True)
    df.fillna(0, inplace = True)

    # Transform dtypes to int, reset index and rename tiers
    df = (df.astype('int64')
            .reset_index()
            .rename(columns={'index':'raw'}))
    
    # Split raw column into multiple
    df[['titleId', 'archetypeId', 'tier']] = df.raw.str.split('.', expand=True)

    # Record only consolidated data by archetype and change index to titleId
    df = (df[df.archetypeId == 'ALL']
            .drop(['raw', 'archetypeId'], axis=1)
            .set_index('titleId'))
    df.index = df.index.astype('int64')
    
    # Time stamp and format
    df.insert(0, 'dt_analytics', pd.Timestamp.today().strftime('%Y-%m-%d'))
    df.insert(1, 'format', format)
    
    return df, games

analytics, no_games = request_analytics(format_id)
no_games

{'b': 1687, 'g': 6825, 'p': 39378, 's': 3731}

ABT

In [73]:
def abt(format, sets, tier='ALL', full_report=False):
    "Full dataframe for a given query"
    card_information = get_card_information(sets)
    analytics, games = request_analytics(format)

    # Choose either aggregate statistics per tier or single one
    # Use if clause to calculate 'included' statistic
    if tier == 'ALL': # Aggregate games statistics
        agg_dict = {column : 'sum' 
                    if column not in ['dt_analytics', 'format', 'tier']
                    else 'max'
                    for column in analytics.columns}
        analytics = analytics.groupby('titleId').agg(agg_dict)
        analytics.tier = 'ALL'
        analytics['included'] = analytics.games/sum(games.values())
    else:
        analytics = analytics[analytics.tier == tier]
        analytics['included'] = analytics.games/games[tier]

    # Merge analytics and card information
    merged = (analytics.reset_index()
                       .merge(card_information.reset_index(),
                              how='left', 
                              on='titleId')
                       .set_index('titleId'))
    
    # Winrate
    merged['winrate'] = merged.wins/merged.games

    # Quantity
    copies = ['copies_1', 'copies_2', 'copies_3', 'copies_4']
    merged['quantity'] = merged[copies].idxmax(axis=1)
    
    if full_report:
        return merged
    else:
        return merged[['title',
                       'set',
                       'rarity', 
                       'castingcost', 
                       'winrate', 
                       'games', 
                       'included', 
                       'quantity']]
    
df_abt = abt(format_id, legal_sets)