In [2]:
import os
import pandas as pd

allcards = pd.read_json('AllCards.json')

In [45]:
import numpy as np
import matplotlib.pyplot as plt

decklists_path = 'data/decks/'

class ArchetypeWorkshop():
        def __init__(self):
            self.deckfiles = []
            self.loadAllDeckFilenames()
            self.decks = []
            self.loadAllDecks()
            self.cards = []
            self.loadAllUsedCards()
            self.used_cards = []
            self.loadAllUsedCardsData()
            self.allcards =  allcards#[]
            #self.loadAllCardsData()
            self.deck_and_name=[]
            self.df = []
            self.decknames = []
            
        def loadAllDeckFilenames(self):
            if len(self.deckfiles) == 0:
                tmp = os.listdir(decklists_path)
                deckfiles = []
                for i in tmp:
                    deckfiles.append(i.replace('~', ''))
                self.deckfiles = list(set(deckfiles))
                
        def loadDecks(self, filename):
            data = pd.read_json('data/decks/'+filename)
            return data['cards']
    
        def loadAllDecks(self):
            self.loadAllDeckFilenames()
            for i in self.deckfiles:
                tmp_decks = self.loadDecks(i)
                for d in tmp_decks:
                    self.decks.append(d)
    
        def loadAllUsedCards(self):
            if len(self.cards) == 0:
                self.loadAllDecks()
                for i in self.decks:
                    for j in i:
                        if "(ORI)" not in j:
                            self.cards.append(j)
                self.cards = sorted(list(set(self.cards)))
                
        def loadAllCardsData(self):
            if os.path.isfile('AllCards.json') == False:
                self.downloadAllCardsData()
            if len(self.allcards) == 0:
                self.allcards = pd.read_json('AllCards.json')
                
        def loadAllUsedCardsData(self):
            self.used_cards = pd.read_json('used_cards.json')
                
        def prepareAllUsedCardsData(self):
            used_cards = []
            for i in self.cards:
                used_cards.append(self.allcards[i])
            self.used_cards = used_cards
            
        def saveUsedCardsAsJson(self):
            df = pd.DataFrame(self.used_cards)
            df.to_json('used_cards.json')
            
            
        # actual archetype detection methods start here
        
        def loadDecksWithNames(self, filename):
            data = pd.read_json('data/decks/'+filename)
            return data['cards'], data['name']
        
        def loadAllDecksWithNames(self):
            self.loadAllDeckFilenames()
            for i in self.deckfiles:
                tmp_decks, tmp_names = self.loadDecksWithNames(i)
                for i in range(0, len(tmp_decks)):
                    self.deck_and_name.append((tmp_decks[i], tmp_names[i]))
                    
        def loadDeckWithAllInfo(self, filename):
            data = pd.read_json('data/decks/'+filename)
            return data['cards'], data['name'], data['player'], data['draft']
        
        def loadAllDecksWithAll(self):
            self.loadAllDeckFilenames()
            for i in self.deckfiles:
                tmp_decks, tmp_names, tmp_players, tmp_drafts = self.loadDeckWithAllInfo(i)
                for i in range(0, len(tmp_decks)):
                    self.deck_and_name.append([tmp_decks[i], tmp_names[i], tmp_players[i], tmp_drafts[i]])
            self.prepareDecknames()
                    
        def showCardTypes(self, deck):
            for i in deck:
                print self.allcards[i]['types']
                
        def getNonlandsOnly(self, deck):
            nonlands = []
            for i in deck:
                if 'Land' not in self.allcards[i]['types']:
                    nonlands.append(i)
            return nonlands
        
        def getNumberOfCardType(self, deck, cardtype):
            acc = 0
            for i in deck:
                if cardtype in self.allcards[i]['types']:
                    acc+=1
            return acc
        
        def getAverageCMC(self, deck):
            acc = 0
            for i in deck:
                acc+=self.allcards[i]['cmc']
                
            return round((acc+0.0)/len(deck), 4)
        
        def getAverageCMCForType(self, deck, cardtype):
            acc = 0
            counter = 0
            for i in deck:
                if cardtype in self.allcards[i]['types']:
                    acc+=self.allcards[i]['cmc']
                    counter+=1
            return round((acc+0.0)/counter, 4)
        
        def getMinMaxMedian(self, deck):
            cmcs = []
            for i in deck:
                cmcs.append(self.allcards[i]['cmc'])
                
            npcmcs = np.array(cmcs)
            return {
                'min': np.min(npcmcs),
                'max': np.max(npcmcs),
                'median': np.median(npcmcs)
            }
        
        def getAveragePowerToughness(self, deck):
            total_power = 0
            total_tough = 0
            total_cmc=0
            number = self.getNumberOfCardType(deck, 'Creature')
            for i in deck:
                if 'Creature' in self.allcards[i]['types']:
                    if self.allcards[i]['power'] not in ['X', '*']:
                        total_power += int(self.allcards[i]['power'])
                    if self.allcards[i]['toughness'] not in ['X', '*']:
                        total_tough += int(self.allcards[i]['toughness'])
                    total_cmc+=int(self.allcards[i]['cmc'])
            return {
                'ave_power': round((total_power+0.0)/number, 4),
                'ave_tough': round((total_tough+0.0)/number, 4),
                'total_power': total_power,
                'total_tough': total_tough,
                'power_cmc': round(total_power/(0.0+total_cmc), 4),
                'tough_cmc': round(total_tough/(0.0+total_cmc), 4)
            }
            
        def examineDeck(self, i):
            deck = self.deck_and_name[i][0]
            nonlands = self.getNonlandsOnly(deck)
            deck_name = self.deck_and_name[i][1]
            player_name = self.deck_and_name[i][2]
            draft_num = self.deck_and_name[i][3]
            tmp = deck_name.split(' ')
            simple_name = tmp[1]
            
            
            min_max_median = self.getMinMaxMedian(deck)
            average_power_toughness = self.getAveragePowerToughness(nonlands)
            creature_density = round(self.getNumberOfCardType(nonlands, 'Creature')/(0.0 + len(nonlands)), 4)
            power_to_toughness = round(average_power_toughness['total_power']/(0.0+average_power_toughness['total_tough']), 4)
                        
            return {
                'Name': simple_name,
                #'NumberOfCreatures': self.getNumberOfCardType(nonlands, 'Creature'),
                #'NumberOfEquipments': self.getNumberOfCardSubtype(nonlands, 'Equipped'),
                'AverageCMC': self.getAverageCMC(nonlands),
                'AverageCMCCreatures': self.getAverageCMCForType(deck, 'Creature'),
                'CreatureDensity': creature_density,
                'PowerToToughness':power_to_toughness,
                'PowerToCMC': average_power_toughness['power_cmc'],
                'ToughnessToCMC': average_power_toughness['tough_cmc'],
                'MaxCMC': min_max_median['max'],
                #'MedianCMC': min_max_median['median'],
                'AveragePower': average_power_toughness['ave_power'],
                'AverageToughness': average_power_toughness['ave_tough'],
                'Player and draft':player_name+'_'+draft_num
            }
        
        def examineAllDecks(self):
            self.results_all_decks = []
            for i in range(len(self.deck_and_name)):
                self.results_all_decks.append(self.examineDeck(i))
                
        def convertResultsToDataFrame(self):
            my_keys = self.results_all_decks[0].keys()
            self.pre_df = {}
            for i in my_keys:
                self.pre_df[i] = []
            
            for i in self.results_all_decks:
                for j in my_keys:
                    self.pre_df[j].append(i[j])
            
            self.df = pd.DataFrame.from_dict(self.pre_df)
            
        def showDistributionForEachParameter(self):
            my_keys = self.results_all_decks[0].keys()
            self.pre_df = {}
            for i in my_keys:
                self.pre_df[i] = []
            
            for i in self.results_all_decks:
                for j in my_keys:
                    self.pre_df[j].append(i[j])
                        
            for i in my_keys:
                if i not in ['Name', 'Player and draft']:
                    print i,'Max: ', max(self.pre_df[i]), 'Min: ', min(self.pre_df[i])
                    hist = self.df[i].hist(bins = 20)
                    plt.style.use('ggplot')
                    plt.show()
                    
        def getAllNumericalKeyPairs(self):
            my_keys = self.results_all_decks[0].keys()
            numeric_keys = []
            for i in my_keys:
                if i not in ['Name', 'Player and draft']:
                    numeric_keys.append(i)
                    
            pairs = []
            for i in range (0, len(numeric_keys)):
                for j in range (i+1, len(numeric_keys)):
                    pairs.append((numeric_keys[i], numeric_keys[j]))
                
            return pairs
        
        def getColorsByArchetypes(self):
            archetype_color_dict = {
                'Aggro': 'red',
                'Control': 'blue',
                'Mid-Range': 'orange',
                'Ramp': 'green'
            }
            colors = []
            for i in self.pre_df['Name']:
                colors.append(archetype_color_dict[i])
            
            return colors   
                    
        def showScatterPlotsForParameterPairs(self):
            key_pairs = self.getAllNumericalKeyPairs()
            colors = self.getColorsByArchetypes()
            
            for i in range(0, len(key_pairs)):          
                x_key = key_pairs[i][0]
                y_key = key_pairs[i][1]
            
                x = self.pre_df[x_key]
                y = self.pre_df[y_key]
          
                plt.scatter(x, y, c=colors, alpha=0.5)
                plt.title('Possible correlation between '+x_key+' and '+y_key)
                plt.xlabel(x_key)
                plt.ylabel(y_key)
                 
                plt.show()
                
        def normalizeSingleValue(self, val, p_min, p_max):
             return round((val - p_min + 0.0) / (p_max - p_min), 4)
                
        def normalizeArray(self, my_array):
            tmp_arr = np.array(my_array)
            p_min = min(tmp_arr) 
            p_max = max(tmp_arr)
            
            normalized_array = []
            for i in my_array:
                normalized_value = self.normalizeSingleValue(i, p_min, p_max)
                normalized_array.append(normalized_value)
            
            return normalized_array
        
        def normalizeDataFrame(self):
            self.normalized_pre_df = {}
            my_keys = self.results_all_decks[0].keys()
            for k in my_keys:
                if k not in ['Name', 'Player and draft']:
                    norm_param = self.normalizeArray(self.pre_df[k])
                    self.normalized_pre_df[k] = norm_param
                else:
                    self.normalized_pre_df[k] = self.pre_df[k]
            
            self.normalized_df = pd.DataFrame.from_dict(self.normalized_pre_df)
            
        def convertNormalizedPreDfToCoordinates(self):
            my_keys = self.results_all_decks[0].keys()
            converted = {}
            for i in range(0, len(self.normalized_pre_df['Name'])):
                tmp_array = [] 
                tmp_key = self.normalized_pre_df['Player and draft'][i]+' '+self.normalized_pre_df['Name'][i]
                for j in my_keys:
                    if j not in ['Player and draft', 'Name']:
                        tmp_array.append(self.normalized_pre_df[j][i])

                converted[tmp_key]=tmp_array
            self.deck_coordinates = converted
            
        def calculateDistanceBetweenTwoDecks(self, co1, co2):
            sum = 0
            for i in range(0, len(co1)):
                sum+=(co1[i]-co2[i])**2
            distance = np.sqrt(sum)
            return distance
            
        def findClosestDeck(self, deck_name):
            my_keys = self.deck_coordinates.keys()
            my_deck_co = self.deck_coordinates[deck_name]
            min_dist = 100
            close_deck = 'None'
            for i in my_keys:
                if (i != deck_name):
                    tmp_dist = self.calculateDistanceBetweenTwoDecks(my_deck_co, self.deck_coordinates[i])
                    if tmp_dist < min_dist:
                        min_dist = tmp_dist
                        close_deck = i
                        
            tmp1 = deck_name.split(' ')
            tmp2 = close_deck.split(' ')
            if tmp1[1] == tmp2[1]:
                same_archetype = True
            else:
                same_archetype = False
                        
            print 'Closest deck to '+deck_name+' is : '+close_deck+' with distance = '+str(min_dist)+' Archetype: '+str(same_archetype)
            return(same_archetype)
            
        def findClosestDeckForEachDeck(self):
            correct = 0
            wrong = 0
            my_keys = self.deck_coordinates.keys()
            for i in my_keys:
                result = self.findClosestDeck(i)
                if result:
                    correct +=1
                else:
                    wrong +=1
            print(correct, wrong)
            
        def getDeckByName(self, deckname):
            for i in self.deck_and_name:
                tmp = i[1].split(' ')
                n = i[2]+'_'+i[3]+' '+tmp[1]
                if n == deckname:
                    return i
                
        def prepareDecknames(self):
            for i in self.deck_and_name:
                tmp = i[1].split(' ')
                n = i[2]+'_'+i[3]+' '+tmp[1]
                self.decknames.append(n)
                
        def getArchetypeLibForCard(self, cardname, deckname):
            archetype_lib = { 'Aggro': 0, 'Ramp': 0, 'Control': 0, 'Mid-Range': 0}
            for i in self.deck_and_name:
                tmp = i[1].split(' ')
                n = i[2]+'_'+i[3]+' '+tmp[1]
                if (cardname in i[0] and n != deckname):
                    archetype_lib[tmp[1]]+=1
                    
            return archetype_lib
        
        def normalizeArchetypeCardLib(self, archetype_card_lib):
            total = 0
            for i in archetype_card_lib.keys():
                total+=archetype_card_lib[i]
                
            if total ==0:
                return { 'Aggro': 0.0, 'Ramp': 0.0, 'Control': 0.0, 'Mid-Range': 0.0}
            
            else:
                return { 'Aggro': round((archetype_card_lib['Aggro']+0.0)/total, 4), 
                        'Ramp': round((archetype_card_lib['Ramp']+0.0)/total, 4),
                        'Control':round((archetype_card_lib['Control']+0.0)/total, 4),
                        'Mid-Range':round((archetype_card_lib['Mid-Range']+0.0)/total, 4)
                       }
                
        def findArchetypeByCardVote(self, deckname):
            my_deck = self.getDeckByName(deckname)
            card_lib_deck = {}
            for i in my_deck[0]:
                card_lib_deck[i] = self.getArchetypeLibForCard(i, deckname)
                
            normalized_lib = {}
            for i in card_lib_deck.keys():
                normalized_lib[i] = self.normalizeArchetypeCardLib(card_lib_deck[i])
                
            vote_lib = { 'Aggro': 0.0, 'Ramp': 0.0, 'Control': 0.0, 'Mid-Range': 0.0}
            for n in normalized_lib:
                for a in normalized_lib[n]:
                    vote_lib[a]+=normalized_lib[n][a]
                    
            top_vote = 0.0
            top_style = ""
            for a in vote_lib:
                if vote_lib[a] > top_vote:
                    top_vote = vote_lib[a]
                    top_style = a
                    
            #print(top_vote, top_style)

            tmp = deckname.split(' ')
            archetype_deckname = tmp[1]
            
            if archetype_deckname == top_style:
                same_archetype = True
            else:
                same_archetype = False
                
            return same_archetype
        
        def findArchetypeByCardVoteAll(self):         
            correct = 0
            wrong = 0
            for i in self.decknames:
                result = self.findArchetypeByCardVote(i)
                if result:
                    correct +=1
                else:
                    wrong +=1
                    
            print(correct, wrong)                                    
                                
workshop = ArchetypeWorkshop()

workshop.loadAllDecksWithAll() # load all deck data

#workshop.loadAllCardsData() - not used due to issues with handling this data frame
#workshop.prepareAllUsedCardsData()  - not used due to issues with handling this data frame
#workshop.saveUsedCardsAsJson()  - not used due to issues with handling this data frame

# Method 1 - Closest neighbour

#workshop.examineAllDecks() # prepare parameters for each deck
#workshop.convertResultsToDataFrame() # prepare data frame for further usage
#workshop.showDistributionForEachParameter() # show parameters distribution
#workshop.showScatterPlotsForParameterPairs() # show parameteres possible correlations
#workshop.normalizeDataFrame()
#workshop.convertNormalizedPreDfToCoordinates()
#workshop.findClosestDeckForEachDeck()

# find archetype by card Vote

# workshop.findArchetypeByCardVote('Casc_18_5 Mid-Range')

workshop.findArchetypeByCardVoteAll()

# next steps:
# 1. Add parameter vote as third method
# 2. Enhance Closest to finding N neighbours and optimize
# 3. Add Method Vote (all three methods used, results 'vote' for final result, ties are broken by strongest method)
# 4. Upgrade card vote method, like ignore 1 or 2, so only more frequent cards matter
# 5. Compare effectivness of each method
# 6. Find decks that are causing the most errors, and then possibly adjust them manually
# 7. Final classification, what %? and workshop is done!
# generate input for graph showing the distances between decks for 1, 2 ... n closest decks with archetype colors
# analyze graph


(134, 98)
