In [92]:
import pandas as pd
import os

%matplotlib inline

import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (8,8)

decklists_path = 'data/decks/'

# card_popularity.csv should be updated after each draft

class CardAnalytics():
    def __init__(self):
        self.cube_data = {} # dictionary, draft_id : [list of cards]
        self.deck_data = {} # dictionary, draft_id : [lis of cards]
        self.loadAllCubes()
        self.loadAllDecks()
        self.calculateBaseExpectedPopularity()
        self.buildCardInfo()
        self.buildCardPopularity()
        # self.saveCardPopularity() # use only after new data is available, so after each draft
        # self.visualiseCardPopularityOverTime('Sol Ring')
        # self.visualiseAllCardPopularityOverTime() # created all relevant graph files for further processing

    def loadAllCubes(self):
        reconstructed_path = 'data/reconstructed/'
        rec_filenames = os.listdir('data/reconstructed')
        numbered_path = 'data/cubes/'
        cube_filenames = os.listdir('data/cubes')
        
        for r in rec_filenames:
            my_file = open(reconstructed_path+r, 'r')
            my_string = my_file.read()
            my_string = my_string.replace('\r', '')
            my_list = my_string.split('\n')
            if len(my_list) < 540:
                cardlist = my_list
                
            else:
                tmp_list = []
                for c in my_list:
                    if c !='':
                        tmp = c.split(' ')
                        tmp_list.append(" ".join(tmp[1:]))
                cardlist = tmp_list
                
            r = r.replace('.txt', '')
            tmp = r.split('_')
            draft_id = tmp[1]+'_'+tmp[2]
                
            self.cube_data[draft_id] = cardlist
        
        for c in cube_filenames:
            my_file = open(numbered_path+c, 'r')
            my_string = my_file.read()
            my_string = my_string.replace('\r', '')
            my_list = my_string.split('\n')
            if len(my_list) < 540:
                cardlist = my_list
                
            else:
                tmp_list = []
                for cd in my_list:
                    if cd !='':
                        tmp = cd.split(' ')
                        tmp_list.append(" ".join(tmp[1:]))
                cardlist = tmp_list
                
            c = c.replace('.txt', '')
            tmp = c.split('_')
            draft_id = tmp[1]+'_'+tmp[2]  
                
            self.cube_data[draft_id] = cardlist
        
    def loadAllDecks(self):
        tmp = os.listdir(decklists_path)
        deckfiles = []
        for i in tmp:
            deckfiles.append(i.replace('~', ''))
        self.deckfiles = list(set(deckfiles))
        
        for d in self.deckfiles:
            data = pd.read_json('data/decks/'+d)
            cards_used = []
            for i in range(0, len(data['cards'])):
                for c in data['cards'][i]:
                    cards_used.append(c)
            self.deck_data[data['draft'][i]] = cards_used
        
    def calculateBaseExpectedPopularity(self):
        
        draft_ids = []
        cards_drafted = {}
        cards_played = {}
        
        for d in self.deckfiles:
            data = pd.read_json('data/decks/'+d)
            draft_ids.append(data['draft'][0])
            cards_drafted[data['draft'][0]] = 4*11*len(data['player'])
            cards_played[data['draft'][0]] = len(self.deck_data[data['draft'][0]])
        
        self.cube_base_popularity = {}
        self.cube_expected_card_popularity = {}
        self.expected_card_popularity = {}
        
        for d in draft_ids:
            self.cube_base_popularity[d] = round((cards_drafted[d]+0.0) / 540, 2)
            # P (any cards will be part of drafting subset)
            self.cube_expected_card_popularity[d] = round((cards_played[d]+0.0) / cards_drafted[d], 2)
            # P (if card is in drafting subset, it will be played in a deck)
            self.expected_card_popularity[d] = round(self.cube_base_popularity[d] * self.cube_expected_card_popularity[d], 2)
            # P (any card is in drafting poo and is played in a deck)
            
            #print(self.cube_base_popularity[d], self.cube_expected_card_popularity[d], self.expected_card_popularity[d])

    def buildCardInfo(self): 
        
        # Step 3 - build CubeCardInfo dictionary
        self.cube_card_info = {}
        
        all_cube_cards = set([])
        for c in self.cube_data.keys():
            all_cube_cards = all_cube_cards.union(set(self.cube_data[c]))
            
        self.all_cards = sorted(list(all_cube_cards))
        
        for c in self.all_cards:
            one_card = {}
            for d in self.deck_data.keys():
                if c in self.cube_data[d]:
                    in_cube = True
                else:
                    in_cube = False
                if c in self.deck_data[d]:
                    in_deck = True
                else:
                    in_deck = False
                one_card[d] = [in_cube, in_deck]
            self.cube_card_info[c] = one_card

    def buildCardPopularity(self):
        # Step 4: build card popularity dictionary
        
        self.card_popularity = {}
        for c in self.all_cards:
            expected = round(self.calculateExpectedCardPopularity(c), 2)
            total_ind, total_inc = self.calculateActualCardPopularity(c)
            actual = round(total_ind / total_inc, 2)

            self.card_popularity[c] = {'expected': expected, 'actual': actual, 
                                       'cubes': int(total_inc), 'decks': int(total_ind)}
            #if actual > expected and total_inc >=10:
            #    print(c, expected, actual, int(total_inc), int(total_ind))   
    
    def calculateExpectedCardPopularity(self, card):
        total = 0.0
        acc = 0
        for c in self.cube_card_info[card]:
            if self.cube_card_info[card][c][0] == True:
                total += self.expected_card_popularity[c]
                acc+=1
                
        return (total / acc)
        
    def calculateActualCardPopularity(self, card):
        total_inc = 0.0
        total_ind = 0.0
        for c in self.cube_card_info[card]:
            if self.cube_card_info[card][c][0] == True:
                total_inc += 1
            if self.cube_card_info[card][c][1] == True:
                total_ind += 1
        return total_ind, total_inc
        
    # Step 5 - save calculated values (distance from expected) for all cards played into .csv file

    def saveCardPopularity(self):
        df = pd.DataFrame.from_dict(self.card_popularity, orient="index")
        df.to_csv("card_popularity.csv")

    # Step 6 - visualise expected and actual popularity for each card over time (and save the images)
    
    def getExpectedPopularityValues(self, card):
        # return list of 'expected' values instead of sum
        expected = []
        for c in self.cube_card_info[card]:
            if self.cube_card_info[card][c][0] == True:
                expected.append(self.expected_card_popularity[c])
                
        return expected
    
    def getActualPopularityValues(self, card):
        # return list of 'actual' values instead of sum
        actual = []
        for c in self.cube_card_info[card]:
            if self.cube_card_info[card][c][0] == True:
                if self.cube_card_info[card][c][1] == True:
                    actual.append(1)
                else:
                    actual.append(0)
        return actual
    
    def getCumulativeAverage(self, my_list):
        acc_average = []
        acc = 0
        for i in range(0, len(my_list)):
            acc+=my_list[i]
            acc_average.append(round((acc+0.0) / (i+1), 2))
        
        return acc_average
            
    def visualiseCardPopularityOverTime(self, card):
        if  self.card_popularity[card]['cubes'] >= 10:
            expected = self.getExpectedPopularityValues(card)
            actual = self.getActualPopularityValues(card)
            acc_expected = self.getCumulativeAverage(expected)
            acc_actual = self.getCumulativeAverage(actual)
        
            plt.plot(acc_expected[4:], label="Expected")
            plt.plot(acc_actual[4:], label="Actual")
            plt.legend(['Expected card popularity', 'Actual card popularity'])
            plt.grid(True)
            plt.title('Expected and Actual popularity per draft: '+card)
            plt.savefig('popularity/popularity_'+card+'.png')
            plt.close()

    def visualiseAllCardPopularityOverTime(self):
        for c in self.all_cards:
            self.visualiseCardPopularityOverTime(c)
 
        # create some template in .html
        # table: card image (from images_all) + graph in some cool format
        # inject table to html and print to .pdf (similarly to visual)


ca = CardAnalytics()