In [3]:
import numpy as np
import json

# Load card data
np_cards = np.array(json.load(open('../data/cards.json', 'r', encoding='utf-8')))

In [4]:
from preprocess.scryfall_preprocessor import ScryfallPreprocessor

# Initialize preprocessor
sf_preprocessor = ScryfallPreprocessor(None)

In [5]:
color_identity_letter = ['w', 'u', 'b', 'r', 'g', 'c']

# Create inputs and targets
inputs = []
# input text = type_line + rules_text (if not None) + power / toughness (if not None)
for card in np_cards:
    if sum(card['color_identity']) > 1:
        continue

    input_text = sf_preprocessor.train_text(card)
    input_text = sf_preprocessor.stopword_preprocessing(input_text)

    # index 0 is w, 1 is u, 2 is b, 3 is r, 4 is g, 5 is c
    # convert color identity to corresponding letter
    target = ''
    for i in range(len(card['color_identity'])):
        if card['color_identity'][i] == 1:
            target = color_identity_letter[i]
            break
    inputs.append((input_text, target))

# Example
print(inputs[10])

('enchantment aura enchant creature cardname enters battlefield tap enchanted creature enchanted creature untap controller untap step', 'u')


In [6]:
# train test split
from sklearn.model_selection import train_test_split

train, test = train_test_split(inputs, test_size=0.2, random_state=42)

In [7]:
# TF-IDF Vectorizing
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer()
x_train = tfidf.fit_transform([x[0] for x in train])
x_test = tfidf.transform([x[0] for x in test])

y_train = [x[1] for x in train]
y_test = [x[1] for x in test]



In [8]:
from sklearn.metrics import f1_score, recall_score
from sklearn.metrics import precision_score
# Logistic Regression
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(max_iter=1000)
clf.fit(x_train, y_train)
print("Logistic Regression Accuracy: ", clf.score(x_test, y_test))
print("Logistic Regression F1 Score: ", f1_score(y_test, clf.predict(x_test), average='weighted'))
print("Logistic Regression Precision: ", precision_score(y_test, clf.predict(x_test), average='weighted'))
print("Logistic Regression Recall: ", recall_score(y_test, clf.predict(x_test), average='weighted'))

Logistic Regression Accuracy:  0.7239860950173812
Logistic Regression F1 Score:  0.7238926381936406
Logistic Regression Precision:  0.7248200179975769
Logistic Regression Recall:  0.7239860950173812


In [24]:
# Example Prediction

# get random card of test set
import random

random_card = random.choice(test)

# get prediction
prediction = clf.predict(tfidf.transform([random_card[0]]))[0]

print("Text: ", random_card[0])
print("Actual: ", random_card[1])
print("Prediction: ", prediction)

Text:  creature human cleric tap tap target non human creature
Actual:  w
Prediction:  w
