In [1]:
import pandas as pd
import numpy as np
import tensorflow.keras as keras
import tensorflow as tf
import matplotlib.pyplot as plt

In [2]:
def list_2D(r, c):
    l = []
    for i in range(r):
        x = [0] * c
        l.append(x)
    return np.array(l)

In [3]:
#Convert labels from a list of names into one hot encodings
#Have one null character for the full one hot encoding of the label
def labelsToOneHotEncodings(arr):
    #Add 'null' special character to each label
    arr = [each_string + "@" for each_string in arr]
    
    MAX_NUM_CHARS = 32 * 2 + 4 #Maxium number of characters a label can be
    SPECIAL_CHARS = 69 #Maxium number of special characters the one hot encoding will have
    count = 0
    
    #Array to contain each label's one hot encoding, 2-D arrays
    encodings = []
    
    #Convert each label into 2-D array to represent the one hot encoding
    for label in arr:
        i,j = -1,0 #Indexes for the postion to mark 1 into the 2-D array
        labelMat = list_2D(MAX_NUM_CHARS,SPECIAL_CHARS) #Special Characters by Max Number of Characters
        count = count + 1
        
        #Find the location to mark 1 into the one hot encoding that represents each character in the label
        for character in label:
            #Increase the position of the 2-D matrix to mark by each character in the label
            i+=1
                        
            #Check the character to determine the index of where to mark in the 2-D matrix
            if (((ord(character) - ord('a')) >= 0) and ((ord(character) - ord('a')) < 26)):#Lowercase Alphabet
                j = ord(character) - ord('a')
            elif (((ord(character) - ord('A')) >= 0) and ((ord(character) - ord('A')) < 26)):#Uppercase Alphabet
                j = 26 + (ord(character) - ord('A'))
            elif (((ord(character) - ord('0')) >= 0) and ((ord(character) - ord('0')) < 10)):#0-9
                j = 52 + (ord(character) - ord('0'))
            elif (ord(character) == 32):#' '
                j = 62
            elif (ord(character) == 44):#','
                j = 63
            elif (ord(character) == 45):#'-'
                j = 64
            elif (ord(character) == 39):#"'"
                j = 65
            elif (ord(character) == 47):#'/'
                j = 66
            elif (ord(character) == 64):#'@'
                j = 68
            else: #Garbage
                j = 67
            
            #Mark 1 into the position of the one hot encoding based on the indexes
            try:
                labelMat[i][j] = 1
            except:
                #Error Message for determing which label is causing issues for special character debugging
                print(label)
                print(count)
                break
        
        #Add the one hot encoded matrix of the label into the list of labels' one hot encodings
        encodings.append(labelMat)
        
    #Return the encoded labels, the max number of characters, and the max number of special characters
    return np.array(encodings), MAX_NUM_CHARS, SPECIAL_CHARS

In [4]:
#Convert labels from a list of names into one hot encodings
#Have the rest of the characters passed the length of the label to a 'null' one hot encoding
def labelsToOneHotEncodings_2(arr):    
    MAX_NUM_CHARS = 32 * 2 + 4 #Maxium number of characters a label can be
    SPECIAL_CHARS = 69 #Maxium number of special characters the one hot encoding will have
    count = 0
    
    #Array to contain each label's one hot encoding, 2-D arrays
    encodings = []
    
    #Convert each label into 2-D array to represent the one hot encoding
    for label in arr:
        count = count + 1
        i,j = -1,0 #Indexes for the postion to mark 1 into the 2-D array
        labelMat = list_2D(MAX_NUM_CHARS,SPECIAL_CHARS) #Special Characters by Max Number of Characters
        
        #Find the location to mark 1 into the one hot encoding that represents each character in the label
        for character in label:
        #Increase the position of the 2-D matrix to mark by each character in the label
            i+=1
                        
            #Check the character to determine the index of where to mark in the 2-D matrix
            if (((ord(character) - ord('a')) >= 0) and ((ord(character) - ord('a')) < 26)):#Lowercase Alphabet
                j = ord(character) - ord('a')
            elif (((ord(character) - ord('A')) >= 0) and ((ord(character) - ord('A')) < 26)):#Uppercase Alphabet
                j = 26 + (ord(character) - ord('A'))
            elif (((ord(character) - ord('0')) >= 0) and ((ord(character) - ord('0')) < 10)):#0-9
                j = 52 + (ord(character) - ord('0'))
            elif (ord(character) == 32):#' '
                j = 62
            elif (ord(character) == 44):#','
                j = 63
            elif (ord(character) == 45):#'-'
                j = 64
            elif (ord(character) == 39):#"'"
                j = 65
            elif (ord(character) == 47):#'/'
                j = 66
            elif (ord(character) == 64):#'@' null character
                j = 68
            else: #Garbage
                j = 67
            
            try:
                #Mark 1 into the position of the one hot encoding based on the indexes
                labelMat[i][j] = 1
            except:
                #Error Message for determing which label is causing issues for special character debugging
                print(label)
                print(len(label))
                print(count)
                break
                
                
        #Fill the rest of the missing characters of the label with the 'null' character one hot encodings
        for row in range(len(label),MAX_NUM_CHARS):
            labelMat[row][SPECIAL_CHARS-1] = 1
        
        #Add the one hot encoded matrix of the label into the list of labels' one hot encodings
        encodings.append(labelMat)
    
    #Return the encoded labels, the max number of characters, and the max number of special characters
    return np.array(encodings), MAX_NUM_CHARS, SPECIAL_CHARS

In [1]:
def linearsearch(arr, x):
    for i in range(len(arr)):
        if arr[i] == x:
            return i
    return -1

In [6]:
def oneHotEncodingToWord(encoding):
    #String to capture the word decoded from the one hot encodings
    word = ""
    
    #Decode each character in the onehot encoding and add the character to the word
    for i in range(len(encoding)):
        #Find the index that is the character represented by the one hot encoding
        #j = linearsearch(encoding[i],1)
        j = np.argmax(encoding[i],-1)
        
        #Decode the index to represent a special character
        #Then apply the character to the end of the word
        if (j == 68 or j == 67): #Null or garbage characters
            break
        elif (j >= 0 and j < 26): #Lowercase Aplhabet
            word = word + chr(97 + j)
        elif (j >= 26 and j < 52): #Uppercase Aplhabet
            j = j - 26
            word = word + chr(65 + j)
        elif (j >= 52 and j < 62): #Digits (0-9)
            j = j - 52
            word = word + chr(48 + j)
        elif (j == 62): #Space character
            word = word + " "
        elif (j == 63): #Comma character
            word = word + ','
        elif (j == 64): #Dash character
            word = word + '-'
        elif (j == 65): #Hyphen charcter
            word = word + "'"
        elif (j == 66): #Foward slash character
            word = word + '/'

    return word

In [7]:
x = ["Magical Hacker","Asmoranomardicadaistinaculdacar","Jar Jar Binks","Darth Vader","Struggle // Survive",
     "abcdefghijklmnopqrstuvwxyz","ABCDEF123456789"]

In [8]:
test1, rows, cols = labelsToOneHotEncodings(x)
test2, rows, cols = labelsToOneHotEncodings_2(x)

In [9]:
oneHotEncodingToWord(test2[4])

'Struggle // Survive'

In [10]:
x[4]

'Struggle // Survive'

In [11]:
test2[4]

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1]])

In [12]:
test1[4]

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [13]:
# Read the csv file
data = pd.read_csv('./MTG_data.csv')

# Print it out if you want
print(data)

#Make the list of labels from the cvs
Labels = data.name

       Unnamed: 0                 name   set collector_number  \
0               0          Fury Sliver   tsp              157   
1               1        Kor Outfitter   zen               21   
2               2        Siren Lookout   xln               78   
3               3     Venerable Knight   eld               35   
4               4             Wildcall  ugin              146   
...           ...                  ...   ...              ...   
30266       30660    Tezzeret's Gambit   nph               47   
30267       30661      Morkrut Banshee   isd              110   
30268       30662       Celestine Reef  phop               42   
30269       30663         Horned Troll   8ed              257   
30270       30664  Exultant Skymarcher   rix                7   

                           File_Name  
0            tsp-157-fury-sliver.png  
1           zen-21-kor-outfitter.png  
2           xln-78-siren-lookout.png  
3        eld-35-venerable-knight.png  
4              ugin-146-

In [57]:
#Predictions from the Reading Neural Network
preds = model.predict(PIC)

#Decode the predictions into words
for i in range(25):
    print('Predicted', i, ' :', oneHotEncodingToWord(preds[i]))
    
#preds

Predicted 0  : Wollcniabi i
Predicted 1  : Wolloniabk ie
Predicted 2  : Wollcniabk ie
Predicted 3  : Wollcniabr it
Predicted 4  : Wollcnia
Predicted 5  : Wollo iabi i
Predicted 6  : Wollcniabi ie
Predicted 7  : Wolloniabi i
Predicted 8  : Wollc iabi o
Predicted 9  : Wollcniabi i
Predicted 10  : Wollcniabi i
Predicted 11  : Solloniabi i
Predicted 12  : Wollcniabi i
Predicted 13  : Wollcniabi i
Predicted 14  : Wollcniabk ie
Predicted 15  : Wollctiabk ie
Predicted 16  : Wollcniabk ie
Predicted 17  : Wolloniabi i
Predicted 18  : Wolloniabi i
Predicted 19  : Wollo iabi ie
Predicted 20  : Wollcniabi i
Predicted 21  : Wollonia
Predicted 22  : Sollcniabi i
Predicted 23  : Wollcniabk i
Predicted 24  : Solloniabi it
