# Magic: The Gathering card detection based on image

## Imports

In [19]:
# Import required packages
import cv2
import pytesseract
from langdetect import detect
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import json
import re
import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\louis\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

## Detect text

In [20]:
# Mention the installed location of Tesseract-OCR in your system
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

img_name = '4'

# Read image from which text needs to be extracted in either png or jpg format
img = cv2.imread('./cards/' + img_name + '.jpg')
if img is None:
    img = cv2.imread('./cards/' + img_name + '.png')
	
# Apply OCR on the image
text = pytesseract.image_to_string(img)

print(text)

TFORMERS:com:
LE A TY

Cay

Legendary Artifact Creature — Robot

More Than Meets the Eye 5 6 ® (You may

Uus card converted for 5 6 ®

Flying

) Whenever Cyclonus deals combat damage
player, it connives. Then if Cyclonus’s px
IS D OF greater, convert it. (7d ha

a card, then discarc

a nonland card, put a +1/+




## Detect language

In [21]:
print(detect("This is an example of language detection in python library."))

en


## Find card

In [22]:
# Load the "cards.json" file into a pandas dataframe
with open('cards.json', encoding='utf-8') as f:
    data = json.load(f)
df = pd.json_normalize(data)

# Preprocess the text data
stop_words = set(stopwords.words('english'))
def preprocess_text(text):
    if isinstance(text, str):
        text = text.lower()
        text = re.sub(r'[^\w\s]', '', text)
        text = ' '.join([word for word in text.split() if word not in stop_words])
        if not text:
            return ''
        return text
    return ''   

df['name'] = df['name'].apply(preprocess_text)
df['oracle_text'] = df['oracle_text'].apply(preprocess_text)

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Fit the vectorizer on the preprocessed text data
corpus = df['name'] + ' ' + df['oracle_text']
vectorizer.fit(corpus)

In [23]:

# Define a function to find the closest match to the input text
def find_closest_match(input_text):
    input_text = preprocess_text(input_text)
    input_vector = vectorizer.transform([input_text])
    similarities = cosine_similarity(input_vector, vectorizer.transform(corpus))
    closest_match_index = similarities.argmax()
    closest_match_id = df.iloc[closest_match_index]['id']
    return closest_match_id

# Use the function to find the closest match to the input text and return the corresponding 'id'
input_text = text
print(input_text)
closest_match_id = find_closest_match(input_text)
print(closest_match_id)
print(df[df['id'] == closest_match_id]['name'].values[0])
print(df[df['id'] == closest_match_id]['oracle_text'].values[0])

TFORMERS:com:
LE A TY

Cay

Legendary Artifact Creature — Robot

More Than Meets the Eye 5 6 ® (You may

Uus card converted for 5 6 ®

Flying

) Whenever Cyclonus deals combat damage
player, it connives. Then if Cyclonus’s px
IS D OF greater, convert it. (7d ha

a card, then discarc

a nonland card, put a +1/+


0e84a9db-8130-489b-9f76-e3ecd35a0fd8
spy eye
flying whenever spy eye deals combat damage player may draw card players library
