In [1]:
REFS_FOLDER = "refs"
TESTS_FOLDER = "tests"
SCRYFALL_FILE = "scryfall.json"
INDEX_FILE = "index.bin"
DATABASE_FILE = "data.db"


In [14]:
import requests

# oracle_cards | unique_artwork | default_cards | all_cards | rulings
DATATYPE = "unique_artwork"

bulkData = requests.get("https://api.scryfall.com/bulk-data").json()
for data in bulkData["data"]:
    if data["type"] == DATATYPE:
        break

downloadRequest = requests.get(data["download_uri"])
with open(SCRYFALL_FILE, "wb") as outfile:
    outfile.write(downloadRequest.content)
    outfile.close()


In [None]:
import json
import requests
from progress import ProgressBar
from os import mkdir
from os.path import exists, join

print("Parse Scryfall file")

scryfallFile = open(SCRYFALL_FILE)
scryfallData = json.load(scryfallFile)
scryfallFile.close()

if not exists(REFS_FOLDER):
    mkdir(REFS_FOLDER)

tasks = []
errors = []

IMG_TYPE = "art_crop"  # png | border_crop | art_crop | large | normal | small

progress = ProgressBar(len(scryfallData))

for card in scryfallData:
    try:
        if (not "oracle_id" in card):
            continue
        if (not "paper" in card["games"]):
            continue
        if "image_uris" in card:
            tasks.append(
                {"id": card["id"], "uri": card["image_uris"][IMG_TYPE]})
        else:
            if "card_faces" in card:
                for face in card["card_faces"]:
                    tasks.append(
                        {"id": card["id"], "uri": face["image_uris"][IMG_TYPE]})
    except:
        errors.append(card["scryfall_uri"])
    finally:
        progress.tick()

progress.stop()

tasks = list(filter(lambda task: not exists(
    join(REFS_FOLDER, task["id"] + ".jpg")), tasks))

for error in errors:
    print("An error happened on:", error)

print("Found %i images to download" % (len(tasks)))

progress = ProgressBar(len(tasks))
for task in tasks:
    downloadRequest = requests.get(task["uri"])
    with open(join(REFS_FOLDER, task["id"] + ".jpg"), "wb") as outfile:
        outfile.write(downloadRequest.content)
        outfile.close()
        progress.tick()
progress.stop()


In [2]:
import sqlite3
import cv2 as cv
import numpy as np
from os import listdir
from os.path import join
from progress import ProgressBar

db = sqlite3.connect(DATABASE_FILE)
cursor = db.cursor()

cursor.execute(
    "CREATE TABLE IF NOT EXISTS descriptors (id INTEGER PRIMARY KEY AUTOINCREMENT, cardId VARCHAR(255), x FLOAT, y FLOAT, value TEXT)")

existingCardIds = cursor.execute(
    "SELECT DISTINCT cardId FROM descriptors").fetchall()
existingCardIds = set([id for id, in existingCardIds])

tasks = []
errors = []

for filename in listdir("refs"):
    cardId = filename.split(".")[0]
    if cardId in existingCardIds:
        continue
    tasks.append(cardId)

print("Found %i images to describe" % (len(tasks)))

sift = cv.SIFT_create(200)

progress = ProgressBar(len(tasks))
for task in tasks:
    try:
        image = cv.imread(join(REFS_FOLDER, task + ".jpg"),
                          cv.IMREAD_GRAYSCALE)
        kp, des = sift.detectAndCompute(image, None)
        values = [" ".join([str(int(i)) for i in d]) for d in des]
        x = [p.pt[0] for p in kp]
        y = [p.pt[1] for p in kp]
        rows = [(task, x[i], y[i], values[i]) for i in range(len(kp))]
        cursor.executemany(
            "INSERT INTO descriptors (cardId, x, y, value) VALUES (?, ?, ?, ?)", rows)
    except:
        errors.append(task)
    finally:
        progress.tick()
progress.stop()

for error in errors:
    print("An error happened on:", error)

db.commit()
db.close()


Found 34179 images to describe
An error happened on: 70afd57f-d4cb-4219-a2c7-28ed9b1abe0f
An error happened on: 60ecd9bf-757b-4eeb-bef3-0b9ad2e86fca


In [12]:
import sqlite3
import nmslib
import cv2 as cv
import numpy as np
from progress import ProgressBar

index = nmslib.init(
    method='hnsw',
    space='l2sqr_sift',
    data_type=nmslib.DataType.DENSE_UINT8_VECTOR,
    dtype=nmslib.DistType.INT
)

db = sqlite3.connect(DATABASE_FILE)
cursor = db.cursor()

count = cursor.execute("SELECT COUNT(*) FROM descriptors").fetchone()[0]

print("Found %i descriptors to index" % (count))

progress = ProgressBar(count)
for id, value in cursor.execute("SELECT id, value FROM descriptors"):
    value = np.uint8(value.split(" "))
    index.addDataPoint(id, value)
    progress.tick()
progress.stop()

db.close()

index.createIndex({
    'M': 15,
    'indexThreadQty': 4,
    'efConstruction': 100,
    'post': 0,
}, True)
index.saveIndex(INDEX_FILE, True)



Found 3415805 descriptors to index


---


In [3]:
import sqlite3
import nmslib
import cv2 as cv
import numpy as np

sift = cv.SIFT_create()

index = nmslib.init(
    method='hnsw',
    space='l2sqr_sift',
    data_type=nmslib.DataType.DENSE_UINT8_VECTOR,
    dtype=nmslib.DistType.INT
)

index.loadIndex(INDEX_FILE, True)

NeighbourCount = 10
DistanceThreshold = 2e4
MatchCountThreshold = 20

def identity(imagePath):
    image = cv.imread(imagePath,
                      cv.IMREAD_GRAYSCALE)
    kp, des = sift.detectAndCompute(image, None)
    neighbours = index.knnQueryBatch(des.astype(
        np.uint8), k=NeighbourCount, num_threads=4)

    db = sqlite3.connect(DATABASE_FILE)
    cursor = db.cursor()

    matches = {}
    for i in range(len(neighbours)):
        neighbour = neighbours[i]
        indices, distances = neighbour
        for j in range(len(indices)):
            if (distances[j] > DistanceThreshold):
                continue
            id = indices[j]
            x, y, cardId = cursor.execute(
                "SELECT x, y, cardId FROM descriptors WHERE id = %i" % (id)).fetchone()
            if not cardId in matches:
                matches[cardId] = []
            matches[cardId].append(
                [[x, y], kp[i].pt])

    votes = {}
    for label, points in matches.items():
        if (len(points) < MatchCountThreshold):
            continue
        sourcePoints = np.float32([m[0] for m in points]).reshape(-1, 1, 2)
        destPoint = np.float32([m[1] for m in points]).reshape(-1, 1, 2)
        _, mask = cv.findHomography(
            sourcePoints, destPoint, cv.RANSAC, 5.0)
        votes[label] = np.sum(mask)

    cursor.close()
    db.close()

    return sorted(votes.items(), reverse=True, key=lambda x: x[1])[:3]


Check failed: input Cannot open file 'index.bin.dat' for reading


RuntimeError: Check failed: Cannot open file 'index.bin.dat' for reading

In [4]:
import time
import json
from os import listdir
from os.path import join

scryfallFile = open(SCRYFALL_FILE)
scryfallData = json.load(scryfallFile)
scryfallFile.close()

for filename in listdir(TESTS_FOLDER):
    print("===\n%s" % filename)
    imagePath = join(TESTS_FOLDER, filename)
    start = time.time()
    matches = identity(imagePath)
    print("Took %.2fs" % (time.time() - start))
    for cardId, votes in matches:
        for card in scryfallData:
            if card["id"] == cardId:
                break
        print("%i votes for %s (%s) %s" % (votes, card["name"], cardId, card["scryfall_uri"]))


===
Test-bouncer.jpg


NameError: name 'identity' is not defined

In [32]:
imagePath = './tests/test-01.jpg'

image = cv.imread(imagePath,
                  cv.IMREAD_GRAYSCALE)
kp, des = sift.detectAndCompute(image, None)
neighbours = index.knnQueryBatch(des.astype(
    np.uint8), k=NeighbourCount, num_threads=4)

db = sqlite3.connect(DATABASE_FILE)
cursor = db.cursor()

matches = {}
for i in range(len(neighbours)):
    neighbour = neighbours[i]
    indices, distances = neighbour
    for j in range(len(indices)):
        if (distances[j] > DistanceThreshold):
            continue
        id = indices[j]
        x, y, cardId = cursor.execute(
            "SELECT x, y, cardId FROM descriptors WHERE id = %i" % (id)).fetchone()
        if not cardId in matches:
            matches[cardId] = []
        matches[cardId].append(
            [(x, y), kp[i].pt])

votes = {}
for label, points in matches.items():
    if (len(points) < MatchCountThreshold):
        continue
    sourcePoints = np.float32([m[0] for m in points]).reshape(-1, 1, 2)
    destPoint = np.float32([m[1] for m in points]).reshape(-1, 1, 2)
    _, mask = cv.findHomography(
        sourcePoints, destPoint, cv.RANSAC, 5.0)
    votes[label] = np.sum(mask)

cursor.close()
db.close()

print(sorted(votes.items(), reverse=True, key=lambda x: x[1])[:5])

[('7f41285b-5961-4653-96a0-fb6d27111390', 35), ('8f8abfc8-42bc-45ed-b99b-2c96e3b109a5', 11), ('d183cc80-a9ce-4290-bf65-f8202f508ac0', 9), ('ae9307a6-7eb1-4e20-afd4-8a723db89048', 8), ('e42a0a3d-a987-4b24-b9d4-27380a12e093', 8)]
