Imports and endpoints

In [1]:
from functools import lru_cache
import requests
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 100)
from time import sleep

urls = {
    'api' : 'https://api.mtga.untapped.gg/api/v1/',
    'json' : 'https://mtgajson.untapped.gg/v1/latest/',
}

endpoints = {
    'active': ('api', 'meta-periods/active'),
    'analytics': ('api', 'analytics/query/card_stats_by_archetype_event_and_scope_free/ALL?MetaPeriodId='),
    'cards': ('json', 'cards.json'),
    'text': ('json', 'loc_en.json'),
}

headers = {
    'authority': 'api.mtga.untapped.gg',
    'accept': '*/*',
    'accept-language': 'en-US,en;q=0.9,pt;q=0.8',
    'if-none-match': '"047066ff947f01e9e609ca4cf0d6c0a6"',
    'origin': 'https://mtga.untapped.gg',
    'referer': 'https://mtga.untapped.gg/',
    'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-site',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
}


In [2]:
@lru_cache
def request(keyword, send_headers=True, format=''):
    'Returns JSON from corresponding keyword'
    url_kw, endpoint = endpoints[keyword]
    url = urls[url_kw]
    
    sleep(1) # :)
    if send_headers:
        return requests.get(url+endpoint+format, headers).json()
    else:
        return requests.get(url+endpoint+format).json()

Active

In [3]:
def request_active():
    'Returns format ID and lists of standard legal sets'
    latest = dict()

    # Extracting information from the lastest standard BO1 format
    for format in request('active'):
        if format['event_name'] == 'Ladder':
            latest = format

    return str(latest['id']), latest['legal_sets']

format_id, legal_sets = request_active()
print(format_id, legal_sets)

355 ['MID', 'VOW', 'NEO', 'SNC', 'DMU', 'BRO', 'ONE', 'MOM']


Cards

In [4]:
def request_cards(sets):
    'Returns raw card dataframe'
    df = pd.DataFrame(request('cards'))

    # Ony bother with standard legal cards
    df = df[df.set.isin(sets)]

    # Remove duplicates by considering only the latest reprint
    gb = df.groupby('titleId').agg({'grpid':'max'})
    df = df[df.grpid.isin(gb.grpid)]

    return df.set_index('grpid')

raw_card = request_cards(legal_sets)

Text

In [5]:
def request_text():
    'Returns raw card text dataframe'
    df = pd.DataFrame(request('text')).set_index('id')

    # Collapse columns raw and text, prioritizing raw
    df.loc[~df.raw.isna(), 'text'] = df.raw
    df.drop('raw', axis='columns', inplace=True)

    return df

raw_text = request_text()

Card information

In [6]:
def filter_raw_card(raw_card):
    'Remove undesirable columns, change naming conventions and change index to titleId'
    
    # Columns to ignore and corresponding reasoning
    ignore = [
        'artistCredit',                  # Deemed not useful for analysis     
        'collectorNumber',               # Deemed not useful for analysis
        'collectorMax',                  # Deemed not useful for analysis
        'types',                         # Aggregation from other columns
        'subtypes',                      # Aggregation from other columns
        'colors',                        # Aggregation from other columns
        'colorIdentity',                 # Aggregation from other columns
        'frameColors',                   # Deemed not useful for analysis
        'supertypes',                    # Aggregation from other columns
        'rawFrameDetails',               # Deemed not useful for analysis
        'altDeckLimit',                  # Not standard relevant
        'DigitalReleaseSet',             # Not standard relevant
        'frameDetails',                  # Deemed not useful for analysis
        'abilityIdToLinkedTokenGrpId',   # Not standard relevant
        'linkedFaceType',                # Deemed not useful for analysis
        'linkedFaces',                   # Deemed not useful for analysis
        'usesSideboard',                 # Not standard relevant
        'IsDigitalOnly',                 # Not standard relevant
        'watermark',                     # Deemed not useful for analysis
        'RebalancedCardLink',            # Deemed not useful for analysis
        'altTitleId',                    # Deemed not useful for analysis
        'DefunctRebalancedCardLink',     # Deemed not useful for analysis
        'abilityIdToLinkedConjurations', # Not standard relevant
        'grpid',                         # Obsolete, as titleId is now unique
        'hiddenAbilities',               # Already contained in 'abilities'
    ]

    # Naming conventions
    # raw_card.index.rename('card_id', inplace=True)
    renamed = raw_card.rename({
        'set':'set_id',
        'isSecondaryCard': 'is_secondary_card',
        'isToken': 'is_token',
        'IsRebalanced': 'is_rebalanced',
        'artId': 'art_id'},
        axis = 'columns'
    )

    # Set titleId to index and drop undesirable columns
    return renamed.reset_index().drop(ignore, axis='columns')

In [7]:
def get_card_dataframe(df):
    "Returns card dataframe"   

    # Card columns mappable to text
    id_to_text = ['titleId', 'flavorId', 'cardTypeTextId', 'subtypeTextId']

    # Additional text_columns columns
    df = df.join(
        df[id_to_text]
        .applymap(lambda x: raw_text.loc[x].values[0], na_action='ignore')
        .rename(columns={column: column[:-2] for column in id_to_text}) # Remove Id
    )

    # Only tracked supertype will be 'legendary'
    df['is_legendary'] = df.cardTypeText.str.contains('Legendary')

    # Replace empty string flavor with NaN
    df['flavor'] = df['flavor'].replace({'': np.nan})

    # Define the columns and order for card dataframe
    order = [
        'titleId',
        'art_id',
        'set_id',
        'title',
        'rarity',
        'power',
        'toughness',
        'flavor',
        'is_legendary',
        'is_token',
        'is_secondary_card',
        'is_rebalanced',
    ]

    return df[order]

In [8]:
def get_card_type(df):
    'Returns card type dataframe'
    # Convert 'cardTypeTextId' to text
    df = df.join(df.cardTypeTextId
                   .map(lambda x: raw_text.loc[x].values[0], na_action='ignore')
                   .rename('type'))
    
    # Split the text and transform into a list of rows, deleting the ones without information
    df = df.type.str.split().explode().dropna()

    # There are special cases of cards not having types such as cards 'Day' and 'Night'
    df = df[~df.isin(['NONE', 'Legendary', 'Basic', 'Token'])]

    return df

In [9]:
def get_card_subtype(df):
    "Returns card subtype dataframe"
    # Convert 'subtypeTextId' to text
    df = df.join(df.subtypeTextId
                   .map(lambda x: raw_text.loc[x].values[0], na_action='ignore')
                   .rename('subtype'))

    # Split the text and transform into a list of rows, deleting the ones without information
    df = df.subtype.str.split().explode().dropna()
    
    return df

In [10]:
def get_card_cost(df):
    "Returns card cost dataframe"
    # Casting color codes in 'castingcost' column
    casting_colors = {
        'Black': r'oB',
        'Blue': r'oU',
        'Green': r'oG',
        'Red': r'oR',
        'White': r'oW',
        'Multicolor': r'\(',
        'X': r'oX'
    }

    # Create a column for each variety
    for color, code in casting_colors.items():
        df[color] = df.castingcost.str.count(code)

    # Special case is colorless that can be any number
    df['Colorless'] = df.castingcost.str.extract('(\d+)')

    # Stack columns into rows and set index to card_id only
    df = (df[['Colorless'] + list(casting_colors.keys())].stack()
                                                         .reset_index()
                                                         .set_index('card_id')
                                                         .rename(columns={'level_1': 'color', 0: 'cost'}))

    df.cost = pd.to_numeric(df.cost) # Convert from string to number

    # Only record costs above zero
    return df[df.cost > 0]

In [11]:
def get_card_ability(df):
    df = (df.abilities
            .dropna()
            .explode()
            .apply(lambda x: x.get('TextId'))
            .map(lambda x: raw_text.loc[x].values[0]))      
    return df

In [12]:
def get_card_information():
    'Returns multiple dataframes containing card information after normalization'
    filtered = filter_raw_card(raw_card)
    card = get_card_dataframe(filtered)

    # Rename 'titleId' to 'card_id' and promote it to index
    for df in [filtered, card]:
        df.set_index('titleId', inplace=True)
        df.index.name = 'card_id'

    card_type = get_card_type(filtered)
    card_subtype = get_card_subtype(filtered)
    card_cost = get_card_cost(filtered)
    card_ability = get_card_ability(filtered)

    return card, card_type, card_subtype, card_cost, card_ability

card, card_type, card_subtype, card_cost, card_ability = get_card_information()

Analytics

In [13]:
def request_analytics(format):
    'Returns raw analytics dataframe'
    json = request('analytics', False, format)

    return pd.json_normalize(json['data']).T

raw_analytics = request_analytics(format_id)

In [21]:
def filter_raw_analytics():
    # Reset index and rename tiers
    df = (raw_analytics.reset_index()
                       .rename(columns={'index':'raw'}))
    
    # Split raw column into multiple
    df[['card_id', 'archetype_id', 'tier']] = df.raw.str.split('.', expand=True)

    # Record only consolidated data by archetype and change index to titleId
    df = (df[df.archetype_id == 'ALL']
            .drop(['raw', 'archetype_id'], axis=1)
            .set_index('card_id'))
    
    # Replace abbreviations with tier full names
    df.replace({'b': 'Bronze',
                's': 'Silver',
                'g': 'Gold',
                'p': 'Platinum'},
               inplace = True)
    
    # Time stamp
    df['timestamp'] = pd.Timestamp.today()

    # Unnest statistics
    unnest = ['games', 'wins', 'check', 'copies']
    df[unnest] = pd.DataFrame(df.explode(0)[0].to_list(), index=df.index).iloc[:, :4]

    return df[['timestamp', 'tier', 'wins', 'copies']]

In [24]:
def get_card_tiered_daily_games(df):
    "Returns dataframe 'card_tiered_daily_games'"
    # Unnest copies
    unnest = [1, 2, 3, 4]
    df[unnest] = pd.DataFrame(df.copies.to_list(), index=df.index)

    df = (df.reset_index()
            .melt(id_vars = ['card_id', 'timestamp', 'tier'],
                  value_vars = unnest,
                  var_name = 'copies',
                  value_name = 'games')
            .set_index('card_id')
            .dropna())
    
    return df

In [23]:
def get_analytics():
    "Returns both 'card_tiered_daily_games' and 'card_tiered_daily_wins'"

    filtered = filter_raw_analytics()

    card_tiered_daily_wins = filtered[['timestamp', 'tier', 'wins']]
    card_tiered_daily_games = get_card_tiered_daily_games(filtered)
    
    return card_tiered_daily_wins, card_tiered_daily_games

card_tiered_daily_wins, card_tiered_daily_games = get_analytics()