In [216]:
import os
import pickle
import numpy as np
import re
import requests
import pandas as pd
from unidecode import unidecode
from nltk.corpus import stopwords
from string import punctuation
from nltk.tokenize import word_tokenize
from sklearn.preprocessing import MinMaxScaler
from gensim.models import Word2Vec
from bs4 import BeautifulSoup
import torch
from sentence_transformers import SentenceTransformer
from tqdm.auto import tqdm
DATA_PATH = os.path.abspath(os.path.join('..','data'))
CARDS_PATH = os.path.join(DATA_PATH, "cards_unique.pkl")

In [158]:
def load_data(fp):
    """
    Reads in data.

    :param fp: filepath of data stored in pickle
    :returns: data stored in pickle
    """
    with open(fp, "rb") as f:
        data = pickle.load(f)
    return data

def tokenize(text):
    """
    Tokenizes text.

    :param text: text to tokenize
    :returns: tokenized text
    """
    to_remove = stopwords.words("english")+list(punctuation)
    return [x for x in word_tokenize(text.lower()) if x not in to_remove]


In [159]:
def clean_data(cards):
    """
    Performs multiple transformations on data, such as filtering, tokenizing text, and extracting keywords.
    
    :param cards: DataFrame containing information of each non-commander card, such as name, text, and color
    :param commanders: DataFrame containing information of each commander card, such as name, text, and color
    :returns: tuple containing all cleaned data, cleaned non-commander card data, and cleaned commander data
    """
    # filtering out non-legal cards in commander
    legal = pd.read_csv('../data/cardLegalities.csv').loc[:,['commander', 'uuid']]
    cards = cards.merge(legal,on='uuid')
    cards = cards[cards['commander'] == 'Legal']

    cards_clean = cards.loc[cards["text"].apply(lambda x: not (isinstance(x, float) and np.isnan(x))), ["name", "text", "colorIdentity", "keywords", "type"]]
    cards_clean["color"] = cards_clean["colorIdentity"].str.split(", ")
    # tokenize text
    cards_clean["tokenized"] = cards_clean["text"].apply(tokenize)
    # normalized text length
    cards_clean["textLength"] = cards_clean["text"].str.len()
    cards_clean["textLength"] = MinMaxScaler().fit_transform(cards_clean[["textLength"]])
    # keyword list
    cards_clean["keyword_list"] = cards_clean["keywords"].str.split(", ")


    return cards_clean

def train_model(cards_clean):
    """
    Trains Word2Vec model on card text.
    
    :param cards_clean: cleaned DataFrame containing information on all cards
    :returns: trained Word2Vec model
    """
    return Word2Vec(sentences=cards_clean["tokenized"])

In [172]:

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
embeddings = model.encode(sentences)
def cosine_similarity(vector1, vector2):
    """
    Computes the cosine similarity between two vectors.
    
    Args:
    vector1 (torch.Tensor): A tensor representing the first vector.
    vector2 (torch.Tensor): A tensor representing the second vector.
    
    Returns:
    float: The cosine similarity between vector1 and vector2.
    """
    # Ensure the vectors are 1-dimensional
    
    # Compute the dot product between the two vectors
    dot_product = np.dot(vector1, vector2)
    
    # Compute the magnitudes (norms) of the vectors
    norm1 = np.linalg.norm(vector1)
    norm2 = np.linalg.norm(vector2)
    
    # Compute the cosine similarity
    cos_similarity = dot_product / (norm1 * norm2)
    
    return cos_similarity.item()



modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [180]:
def cosine_similarity(vec1, vec2):
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

def find_sim(colors, text):
    # Create a boolean mask based on the condition
    mask = np.where(
        ~cards_clean['color'].apply(lambda x: isinstance(x, list)) | 
        (cards_clean['color'].apply(lambda x: isinstance(x, list) and any([color in colors for color in x]))),
        True, False
    )

    # Apply the mask to filter the DataFrame
    filtered_cards = cards_clean[mask]
    encoded = model.encode(text)

    # Define the function to compute similarity
    def compute_similarity(row):
        similarity = cosine_similarity(encoded, model.encode(row.text))
        return similarity, row.name, row.type

    # Convert DataFrame to list of tuples for use with map
    rows = list(filtered_cards.itertuples(index=False))

    # Enable tqdm progress bar for the map function
    tqdm.pandas()
    scores = list(map(compute_similarity, tqdm(rows, desc="Computing similarities")))

    sorted_scores = sorted(scores, key=lambda x: x[0], reverse=True)
    
    return sorted_scores

In [184]:
sim_cards = find_sim(['B', 'G'], 'Other creatures are Food artifacts in addition to their other types and have “2, T, Sacrifice this permanent: You gain 3 life.”Whenever a Food is put into a graveyard from the battlefield, put two +1/+1 counters on Ygra, Eater of All.')

Computing similarities:   0%|          | 0/13473 [00:00<?, ?it/s]

In [208]:
def format_card_name(card_name:str):
    """
    Formats a card name to be used in a URL for querying from EDHREC.
    """
    first_card = card_name.split("//")[0].strip() # If the card is a split card, only use the first card
    non_alphas_regex = "[^\w\s-]" # Remove everything that's not alphanumeric or space or hyphen
    formatted_name = unidecode(first_card) # remove diacritics
    formatted_name = re.sub(non_alphas_regex, "", formatted_name)
    formatted_name = formatted_name.lower() # Make lowercase
    formatted_name = formatted_name.replace(" ", "-")  # Replace spaces with hyphens
    formatted_name = re.sub(r"-+", "-", formatted_name) # do not have multiple hyphens
    # print(f"In format_commander_name and formatted name is {formatted_name}")
    return formatted_name

def request_json(name:str, redirect=''):
    """
    Request JSON data from EDHREC for a card.

    Parameters:
    - name: card name
    - is_commander: boolean indicating whether the card is a commander
    - redirect: string indicating a redirect URL (optional)
    """
    formatted_name = format_card_name(name)
    if redirect:
        print(f"Redirected to {redirect}")
        json_url = f"https://json.edhrec.com/pages{redirect}.json"
    else:
        json_url = f"https://json.edhrec.com/pages/commanders/{formatted_name}.json"
    response = requests.get(json_url)
    if response.status_code == 200:
        json_data = response.json()
        if 'redirect' in json_data:
            return request_json(name, redirect=json_data['redirect'])
        # print(f"JSON request successful!")
        return json_data
    else:
        json_url = f"https://json.edhrec.com/pages/cards/{formatted_name}.json"
        response = requests.get(json_url)
        if response.status_code == 200:
            json_data = response.json()
            if 'redirect' in json_data:
                return request_json(name, redirect=json_data['redirect'])
            return json_data
        else:
            print(f"JSON request for \"{name}\" ({formatted_name}) failed! Try different card name")

In [219]:
scores = {}
for card in sim_cards:
    #print(card)
    json_data = request_json(card[1])
    #print(json_data)
    if json_data:
        for cmdr in json_data['container']['json_dict']['cardlists'][0]['cardviews']:
            scores[cmdr['name']]
            
        #print(sorted(json_data['cardlist'], key=lambda card: card['num_decks'], reverse=True))

(0.6674562, 'Taste of Death', 'Sorcery')
{'creature': 30, 'instant': 8, 'sorcery': 11, 'artifact': 10, 'enchantment': 6, 'battle': 0, 'planeswalker': 1, 'land': 33, 'basic': 19, 'nonbasic': 14, 'similar': [{'aetherhub_uri': 'https://aetherhub.com/Meta/Format/Commander/?com=Barter%20in%20Blood&updated=all-time', 'archidekt_uri': 'https://archidekt.com/search/decks?commanders="Barter%20in%20Blood"&formats=3', 'color_identity': ['B'], 'cmc': 4.0, 'deckstats_uri': 'https://deckstats.net/decks/search/?search_cards[]=Barter%20in%20Blood&search_format=10', 'image_uris': [{'normal': 'https://cards.scryfall.io/normal/front/2/3/23986add-b33d-4bad-86f3-e2d0f99cf949.jpg?1600699425', 'art_crop': 'https://cards.scryfall.io/art_crop/front/2/3/23986add-b33d-4bad-86f3-e2d0f99cf949.jpg?1600699425'}], 'layout': 'normal', 'moxfield_uri': 'https://moxfield.com/decks/public?format=commander&filter=Barter%20in%20Blood', 'mtggoldfish_uri': 'https://www.mtggoldfish.com/deck/custom?mformat=commander&commander=B

KeyError: 'cardlist'

In [227]:
json_data['container']['json_dict']['cardlists'][0]['cardviews']

{'cardviews': [{'name': 'Asmoranomardicadaistinaculdacar',
   'sanitized': 'asmoranomardicadaistinaculdacar',
   'sanitized_wo': 'asmoranomardicadaistinaculdacar',
   'url': '/commanders/asmoranomardicadaistinaculdacar',
   'inclusion': 1284,
   'label': '43.13% of 2977 decks\n1284 decks',
   'num_decks': 1284,
   'potential_decks': 2977},
  {'name': 'Gyome, Master Chef',
   'sanitized': 'gyome-master-chef',
   'sanitized_wo': 'gyome-master-chef',
   'url': '/commanders/gyome-master-chef',
   'inclusion': 1522,
   'label': '38.90% of 3913 decks\n1522 decks',
   'num_decks': 1522,
   'potential_decks': 3913},
  {'name': 'Greta, Sweettooth Scourge',
   'sanitized': 'greta-sweettooth-scourge',
   'sanitized_wo': 'greta-sweettooth-scourge',
   'url': '/commanders/greta-sweettooth-scourge',
   'inclusion': 437,
   'label': '27.99% of 1561 decks\n437 decks',
   'num_decks': 437,
   'potential_decks': 1561},
  {'name': 'Willowdusk, Essence Seer',
   'sanitized': 'willowdusk-essence-seer',
   

In [231]:
[cmdr['name'] for cmdr in json_data['container']['json_dict']['cardlists'][0]['cardviews']]

['Asmoranomardicadaistinaculdacar',
 'Gyome, Master Chef',
 'Greta, Sweettooth Scourge',
 'Willowdusk, Essence Seer',
 'Chatterfang, Squirrel General',
 'Merry, Warden of Isengard // Pippin, Warden of Isengard',
 'Korvold, Fae-Cursed King',
 'Dina, Soul Steeper',
 'Frodo, Adventurous Hobbit // Sam, Loyal Attendant',
 'Tergrid, God of Fright']