In [1]:
import os
import pickle
import numpy as np
import re
import requests
import pandas as pd
from unidecode import unidecode
from nltk.corpus import stopwords
from string import punctuation
from nltk.tokenize import word_tokenize
from sklearn.preprocessing import MinMaxScaler
from gensim.models import Word2Vec
from bs4 import BeautifulSoup
import torch
from sentence_transformers import SentenceTransformer
from tqdm.auto import tqdm
DATA_PATH = os.path.abspath(os.path.join('..','data'))
CARDS_PATH = os.path.join(DATA_PATH, "cards_unique.pkl")

  from tqdm.autonotebook import tqdm, trange





In [2]:
def load_data(fp):
    """
    Reads in data.

    :param fp: filepath of data stored in pickle
    :returns: data stored in pickle
    """
    with open(fp, "rb") as f:
        data = pickle.load(f)
    return data

def tokenize(text):
    """
    Tokenizes text.

    :param text: text to tokenize
    :returns: tokenized text
    """
    to_remove = stopwords.words("english")+list(punctuation)
    return [x for x in word_tokenize(text.lower()) if x not in to_remove]


In [3]:
def clean_data(cards):
    """
    Performs multiple transformations on data, such as filtering, tokenizing text, and extracting keywords.
    
    :param cards: DataFrame containing information of each non-commander card, such as name, text, and color
    :param commanders: DataFrame containing information of each commander card, such as name, text, and color
    :returns: tuple containing all cleaned data, cleaned non-commander card data, and cleaned commander data
    """
    # filtering out non-legal cards in commander
    legal = pd.read_csv('../data/cardLegalities.csv').loc[:,['commander', 'uuid']]
    cards = cards.merge(legal,on='uuid')
    cards = cards[cards['commander'] == 'Legal']

    cards_clean = cards.loc[cards["text"].apply(lambda x: not (isinstance(x, float) and np.isnan(x))), ["name", "text", "colorIdentity", "keywords", "type"]]
    cards_clean["color"] = cards_clean["colorIdentity"].str.split(", ")
    # tokenize text
    cards_clean["tokenized"] = cards_clean["text"].apply(tokenize)
    # normalized text length
    cards_clean["textLength"] = cards_clean["text"].str.len()
    cards_clean["textLength"] = MinMaxScaler().fit_transform(cards_clean[["textLength"]])
    # keyword list
    cards_clean["keyword_list"] = cards_clean["keywords"].str.split(", ")


    return cards_clean

def train_model(cards_clean):
    """
    Trains Word2Vec model on card text.
    
    :param cards_clean: cleaned DataFrame containing information on all cards
    :returns: trained Word2Vec model
    """
    return Word2Vec(sentences=cards_clean["tokenized"])

In [4]:

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
#embeddings = model.encode(sentences)
def cosine_similarity(vector1, vector2):
    """
    Computes the cosine similarity between two vectors.
    
    Args:
    vector1 (torch.Tensor): A tensor representing the first vector.
    vector2 (torch.Tensor): A tensor representing the second vector.
    
    Returns:
    float: The cosine similarity between vector1 and vector2.
    """
    # Ensure the vectors are 1-dimensional
    
    # Compute the dot product between the two vectors
    dot_product = np.dot(vector1, vector2)
    
    # Compute the magnitudes (norms) of the vectors
    norm1 = np.linalg.norm(vector1)
    norm2 = np.linalg.norm(vector2)
    
    # Compute the cosine similarity
    cos_similarity = dot_product / (norm1 * norm2)
    
    return cos_similarity.item()



In [5]:
def cosine_similarity(vec1, vec2):
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

def find_sim(colors, text):
    # Create a boolean mask based on the condition
    mask = np.where(
        ~cards_clean['color'].apply(lambda x: isinstance(x, list)) | 
        (cards_clean['color'].apply(lambda x: isinstance(x, list) and any([color in colors for color in x]))),
        True, False
    )

    # Apply the mask to filter the DataFrame
    filtered_cards = cards_clean[mask]
    encoded = model.encode(text)

    # Define the function to compute similarity
    def compute_similarity(row):
        similarity = cosine_similarity(encoded, model.encode(row.text))
        return similarity, row.name, row.type

    # Convert DataFrame to list of tuples for use with map
    rows = list(filtered_cards.itertuples(index=False))

    # Enable tqdm progress bar for the map function
    tqdm.pandas()
    scores = list(map(compute_similarity, tqdm(rows, desc="Computing similarities")))

    sorted_scores = sorted(scores, key=lambda x: x[0], reverse=True)
    
    return sorted_scores

In [6]:
cards_clean = clean_data(load_data(CARDS_PATH))

sim_cards = find_sim(['B', 'G'], 'Other creatures are Food artifacts in addition to their other types and have “2, T, Sacrifice this permanent: You gain 3 life.”Whenever a Food is put into a graveyard from the battlefield, put two +1/+1 counters on Ygra, Eater of All.')

Computing similarities:   0%|          | 0/13473 [00:00<?, ?it/s]

In [7]:
def format_card_name(card_name:str):
    """
    Formats a card name to be used in a URL for querying from EDHREC.
    """
    first_card = card_name.split("//")[0].strip() # If the card is a split card, only use the first card
    non_alphas_regex = "[^\w\s-]" # Remove everything that's not alphanumeric or space or hyphen
    formatted_name = unidecode(first_card) # remove diacritics
    formatted_name = re.sub(non_alphas_regex, "", formatted_name)
    formatted_name = formatted_name.lower() # Make lowercase
    formatted_name = formatted_name.replace(" ", "-")  # Replace spaces with hyphens
    formatted_name = re.sub(r"-+", "-", formatted_name) # do not have multiple hyphens
    # print(f"In format_commander_name and formatted name is {formatted_name}")
    return formatted_name

def request_json(name:str, redirect=''):
    """
    Request JSON data from EDHREC for a card.

    Parameters:
    - name: card name
    - is_commander: boolean indicating whether the card is a commander
    - redirect: string indicating a redirect URL (optional)
    """
    formatted_name = format_card_name(name)
    if redirect:
        print(f"Redirected to {redirect}")
        json_url = f"https://json.edhrec.com/pages{redirect}.json"
    else:
        json_url = f"https://json.edhrec.com/pages/commanders/{formatted_name}.json"
    response = requests.get(json_url)
    if response.status_code == 200:
        json_data = response.json()
        if 'redirect' in json_data:
            return request_json(name, redirect=json_data['redirect'])
        # print(f"JSON request successful!")
        return json_data
    else:
        json_url = f"https://json.edhrec.com/pages/cards/{formatted_name}.json"
        response = requests.get(json_url)
        if response.status_code == 200:
            json_data = response.json()
            if 'redirect' in json_data:
                return request_json(name, redirect=json_data['redirect'])
            return json_data
        else:
            print(f"JSON request for \"{name}\" ({formatted_name}) failed! Try different card name")

In [27]:
scores = {}
for card in tqdm(sim_cards[:100]):
    #print(card)
    json_data = request_json(card[1])
    #print(json_data)
    if json_data:
        for cmdr in json_data['container']['json_dict']['cardlists'][0]['cardviews']:
            syn_colors = cards_clean[cards_clean['name'] == cmdr['name']]['color'].tolist()#[0]
            if ~(isinstance(syn_colors, list)) | (isinstance(syn_colors, list) and all([color in ['B', 'G'] for color in syn_colors])):
                scores[cmdr['name']] = 1

        for syn_list in json_data['container']['json_dict']['cardlists'][1:]:
            for synergy in syn_list['cardviews']:
                try:
                    syn_colors = cards_clean[cards_clean['name'] == synergy['name']]['color'].tolist()[0]
                    #print(syn_colors)
                    if ~(isinstance(syn_colors, list)) | (isinstance(syn_colors, list) and all([color in {'B', 'G'} for color in syn_colors])):
                        if synergy['name'] in scores:
                            scores[synergy['name']] += synergy['synergy']
                        else:
                            scores[synergy['name']] = synergy['synergy']
                except:
                    continue

    
    
            
        #print(sorted(json_data['cardlist'], key=lambda card: card['num_decks'], reverse=True))

  0%|          | 0/100 [00:00<?, ?it/s]

['G', 'W']
['G', 'W']
['G']
['G']
['G', 'W']
['G', 'W']
['W']
nan
['W']
['G']
['G']
['B']
['G']
['G', 'R', 'W']
['B']
nan
['B', 'G']
['B']
['B']
['B', 'G']
['B']
['G']
['G', 'U']
['G', 'U', 'W']
['B', 'G']
['W']
['B', 'R']
['B', 'G']
['G']
['B', 'G']
['R']
['G']
['G']
['G', 'W']
['B', 'R']
['G', 'W']
['R']
['B', 'R']
['B']
nan
['W']
['G']
['G']
['B', 'G', 'R']
['B']
['B', 'G']
['R']
['B']
['B']
['B']
['R']
['B', 'G', 'R']
['B']
['R']
['B']
['R']
['B', 'R']
['W']
['R']
['R']
['B']
['R']
['B']
['B', 'G']
['G']
['W']
['B', 'W']
['R']
['B', 'G']
['G']
['R']
['W']
['R']
['B', 'W']
['G']
['B']
['B', 'R']
['B', 'R']
['G']
['B', 'G']
['U']
['B']
['W']
['U']
['B']
['B', 'G']
['B']
['B', 'G']
['G']
['G']
['U']
['B', 'R']
['G', 'R']
['G']
['G']
['B']
['B']
['U']
['R']
['W']
['G']
['G']
['R']
['G']
['G']
['B', 'R']
['B']
['G']
['R']
['R']
['G']
['G']
['W']
['W']
['B']
['B']
['B']
['R']
['R']
['G']
['G']
['W']
['W']
['G']
['R']
['G']
['R']
['G']
['B']
['G']
['B']
['B', 'G']
['B']
['G']
['R']
['R']


In [15]:
sorted(scores, key=scores.get, reverse=True)

['Sandsteppe Citadel',
 'Canopy Vista',
 'Mirkwood Bats',
 'Tireless Provisioner',
 "Night of the Sweets' Revenge",
 'Woodland Cemetery',
 'Savvy Hunter',
 'Savage Lands',
 'Gilded Goose',
 'Deceptive Landscape',
 'Restless Cottage',
 'Gingerbread Cabin',
 'Cultivate',
 'Cinder Glade',
 'Golgari Rot Farm',
 'The Shire',
 'Twisted Landscape',
 'Rosie Cotton of South Lane',
 'Banquet Guests',
 'Rapacious Guest',
 'Experimental Confectioner',
 "Ziatora's Proving Ground",
 'Isolated Chapel',
 'Academy Manufactor',
 'Of Herbs and Stewed Rabbit',
 'Jungle Hollow',
 'Llanowar Wastes',
 'Many Partings',
 'Temple of Malady',
 'Jungle Shrine',
 'Pippin, Warden of Isengard',
 'Revive the Shire',
 'Seaside Citadel',
 'Smoldering Marsh',
 'Opulent Palace',
 'Indatha Triome',
 'Zagoth Triome',
 'Trail of Crumbs',
 'Spider Food',
 'Sunpetal Grove',
 "Witch's Oven",
 'Overgrown Tomb',
 'Elanor Gardner',
 'Golgari Signet',
 'Scoured Barrens',
 'Bake into a Pie',
 'Stomping Ground',
 'Moldervine Reclama

In [48]:
cards_clean[cards_clean['name'] == 'Sandsteppe Citadel']['color']

6678    [B, G, W]
Name: color, dtype: object

In [None]:
[cmdr['name'] for cmdr in json_data['container']['json_dict']['cardlists'][0]['cardviews']]

['Asmoranomardicadaistinaculdacar',
 'Gyome, Master Chef',
 'Greta, Sweettooth Scourge',
 'Willowdusk, Essence Seer',
 'Chatterfang, Squirrel General',
 'Merry, Warden of Isengard // Pippin, Warden of Isengard',
 'Korvold, Fae-Cursed King',
 'Dina, Soul Steeper',
 'Frodo, Adventurous Hobbit // Sam, Loyal Attendant',
 'Tergrid, God of Fright']

In [16]:
all([color in ['B', 'G'] for color in cards_clean[cards_clean['name'] == 'Sandsteppe Citadel']['color']])

False

In [23]:
(isinstance(cards_clean[cards_clean['name'] == 'Sandsteppe Citadel']['color'].tolist()[0], list))

True

In [21]:
cards_clean[cards_clean['name'] == 'Sandsteppe Citadel']['color'].tolist()[0]

['B', 'G', 'W']