In [1]:
#Importing the data extractor scripts
import sys
sys.path.insert(0, 'C:\\Users\\AU451FE\\OneDrive - EY\\Desktop\\Python\\HSreplay_scraper\\Scripts')

#import Extractors
#from UltimateExtractor import UltimateExtractor as UE

#Other useful packages
import time
import datetime
import pandas as pd
import numpy as np
import re #String search
import os

In [2]:
driver_path = 'C:/Users/AU451FE/OneDrive - EY/Desktop/Python/HSreplay_scraper/chromedriver'
deck_folder = 'C:/Users/AU451FE/OneDrive - EY/Desktop/Python/HSreplay_scraper/Data Frames'

In [3]:
#Employing the UltimateExtractor
#U = UE(driver_path, deck_folder)
#U.archetype_to_excel('MAGE', 'NO MINIon MAGE')

In [122]:
class UltimateAnalyzer:
    '''Text here
    '''
    def __init__(self, deck_folder):
        self.deck_folder = deck_folder
        
    def percentage_to_float(self, number):
        try:
            number = number.str.strip('%').astype(float)/100 #For series
        except AttributeError:
            number = float(number.strip('%'))/100 #For single numbers
        
        return number
    
    def load_data(self, date, deck = None):
        '''Specify the date and deck name, then load the data from the data repository and return
            this data as either a data frame or a list of data frames, along with the deck keys in similar form.
            Lastly, return names of the decks in a list.
            
        :args:
        - date (str): A date from which to load the data.
        - deck (str): A deck for which to load the data for. If set to None, load the data for all decks.
        
        :usage:
            self.load_data('07-01', 'Rogue - Miracle Rogue')
            
        :returns:
        - data (pd.DataFrame or list): Either a pandas data frame (if deck is specified) or a list of these
            data frames, which contain all deck information.
        - data_keys (list): Either a list or a nested list of deck data sheet names, which serve
            to further extract data from individual sheets.
        - deck_names (string or list): Names of decks included in loaded data. Returned either as a string
            if a single deck is analyzed, or as a list, if multiple decks are analyzed.
        '''
        
        deck_folder_date = f'{self.deck_folder}/{date}'.replace('/', '\\') 
        file_paths = list()
        file_names = list()
        for (dirpath, dirnames, filenames) in os.walk(deck_folder_date):
            file_paths += [os.path.join(dirpath, file) for file in filenames]
            file_names += [re.search(f'(.+) ', file).group(1) for file in filenames]
            
        if deck != None:
            deck = deck.title()
            file_index = file_names.index(deck)
            data = pd.read_excel(file_paths[file_index], sheet_name = None)
            
            data_keys = list()
            [data_keys.append(key) for key in data]
            
            deck_names = deck
        else:
            data = []
            data_keys = []
            for file in file_paths:
                temp = pd.read_excel(file, sheet_name = None)
                data.append(temp)
                
                temp_keys = list()
                [temp_keys.append(key) for key in temp]
                data_keys.append(temp_keys)
                
                deck_names = file_names
                
        return data, data_keys, deck_names
    
    def analyze_deck_winrates(self, date = None, deck = None):
        '''Specify a date and a deck name for which to analyze win rates and return these as a pandas data frame.
            Said table contains the deck name and win rates both weighted and unweighted against all classes.
            
        :args:
        - date (str): The day for which to analyze the win rates.
        - deck (str): The deck for which to analyze the win rates.
        
        :usage:
            self.analyze_deck_winrates('07-01', 'Rogue - Miracle Rogue')
            
        :returns:
        - data_output (pd.DataFrame): A pandas data frame containing the deck name and win rates
            both weighted and unweighted against all classes.
            
        :note:
        - The deck name must be passed in a predefined format (e.g., Rogue - Miracle Rogue),
            apart from capitalization, which does need to be correct.
        '''
        data, data_keys, deck_names = self.load_data(date = date, deck = deck)

        if type(deck_names) == list:
            data_output = []
            deck_count = 0
            for d in data:
                overview = d.get('Overview')
                win_rates = overview.loc[:, 'Overall Winrate':'vs. Warlock'].apply(lambda x: self.percentage_to_float(x))
                sample_size = overview.loc[:, 'Sample Size']

                #Unweighted win rates
                WR_unweighted = win_rates.apply(np.mean, axis = 0)

                #Weighted win rates
                weights = sample_size/sum(sample_size)
                temp = win_rates.apply(lambda x: x*weights)
                WR_weighted = temp.apply(np.sum, axis = 0)
            
                deck_name = deck_names[deck_count]
                deck_count += 1
                temp = pd.DataFrame({'Deck Name': deck_name,
                                    'Unweighted Win Rate': WR_unweighted,
                                    'Weighted Win Rate' : WR_weighted})
                
                temp = temp.reset_index()
                temp = temp.set_index('Deck Name')
                data_output = temp.rename(columns = {'index' : 'Versus'})                
                
                data_output.append(temp)
                
        else:
            overview = data.get('Overview')
            win_rates = overview.loc[:, 'Overall Winrate':'vs. Warlock'].apply(lambda x: self.percentage_to_float(x))
            sample_size = overview.loc[:, 'Sample Size']

            #Unweighted win rates
            WR_unweighted = win_rates.apply(np.mean, axis = 0)

            #Weighted win rates
            weights = sample_size/sum(sample_size)
            temp = win_rates.apply(lambda x: x*weights)
            WR_weighted = temp.apply(np.sum, axis = 0)
            
            temp = pd.DataFrame({'Deck Name': deck_names,
                                    'Unweighted Win Rate': WR_unweighted,
                                    'Weighted Win Rate' : WR_weighted})
            
            temp = temp.reset_index()
            temp = temp.set_index('Deck Name')
            data_output = temp.rename(columns = {'index' : 'Versus'})
        
        return data_output
    
    def analyze_card_performance(self, date = None, deck = None):
        pass
    
    
    
    

In [123]:
UA = UltimateAnalyzer(deck_folder)

In [128]:
data, data_keys, deck_names = UA.load_data(date = '07-01')

In [69]:
data = data[0]

In [125]:
data = UA.analyze_deck_winrates('07-01', deck = 'Rogue - Miracle Rogue')

In [76]:
overview = data.get('Overview')
win_rates = overview.loc[:, 'Overall Winrate':'vs. Warlock'].apply(lambda x: UA.percentage_to_float(x))
sample_size = overview.loc[:, 'Sample Size']

WR_unweighted = win_rates.apply(np.mean, axis = 0)

#Weighted win rates
weights = sample_size/sum(sample_size)
temp = win_rates.apply(lambda x: x*weights)
WR_weighted = temp.apply(np.sum, axis = 0)

In [59]:
#data_output = UA.analyze_deck_winrates(date = '07-01')
data = UA.analyze_deck_winrates(date = '07-01')

In [129]:
data[0]

{'Overview':                 Deck Code Match Duration  Turns Turn Duration Overall Winrate  \
 0  jKOngzJJZiRB6ul4BgQIlc    6.9 minutes    8.0    26 seconds           63.1%   
 1  LES9m7ekXEQB0loObib8Kc    8.1 minutes    9.0    27 seconds           54.6%   
 2  TbGN1eIEkiEhFMKo9kEpvd    7.4 minutes    8.5    26 seconds           63.9%   
 3  W7a6M2H5xvmLRFQvjbwbde    7.8 minutes    9.0    26 seconds           63.3%   
 4  5bgvRsdL9cWgq3zS8dX9hb    7.3 minutes    9.0    24 seconds           45.1%   
 
   vs. Demon Hunter vs. Druid vs. Hunter vs. Mage vs. Paladin vs. Priest  \
 0            66.4%     52.9%      48.3%    73.9%       77.7%      65.9%   
 1            61.3%     43.1%      51.8%    62.4%       67.3%      59.6%   
 2            66.1%     63.8%      53.1%    68.8%       63.2%      72.1%   
 3            75.0%     57.1%      57.1%    68.1%       58.8%      66.7%   
 4            61.5%     70.4%      44.1%    41.2%       47.8%      23.3%   
 
   vs. Rogue vs. Shaman vs. Warlock 

In [80]:
overview

Unnamed: 0,Deck Code,Match Duration,Turns,Turn Duration,Overall Winrate,vs. Demon Hunter,vs. Druid,vs. Hunter,vs. Mage,vs. Paladin,vs. Priest,vs. Rogue,vs. Shaman,vs. Warlock,vs. Warrior,Sample Size
0,Lb2Z0L0DX7ZyyETp2kTNMb,8.8 minutes,9.5,28 seconds,56.8%,53.7%,47.8%,49.1%,65.1%,60.1%,57.1%,56.8%,61.7%,50.0%,61.8%,3700
1,qsYBtDORz4omc8rMfLrBTh,8.7 minutes,9.0,29 seconds,62.7%,63.5%,61.6%,57.5%,67.6%,70.2%,60.0%,69.5%,65.6%,51.8%,59.1%,3300
2,SkdknR4pbZjwU2I6TVg7ub,7.9 minutes,9.0,26 seconds,62.0%,63.4%,65.4%,68.3%,64.3%,68.7%,48.6%,72.3%,66.3%,49.4%,53.1%,1800
3,VaH7QfgJQIPb5StN5W98ee,8.0 minutes,9.0,27 seconds,63.3%,63.3%,53.9%,64.6%,66.1%,61.4%,55.4%,66.7%,74.1%,50.9%,67.1%,1000
4,70F94fnLqQOKFTGJTpiDzh,8.8 minutes,9.5,28 seconds,57.9%,48.5%,52.0%,60.0%,65.4%,69.7%,55.8%,59.6%,62.3%,48.3%,54.7%,1000
5,Aobo08Y6K6WAX4hrbdhgwf,8.2 minutes,9.0,27 seconds,60.1%,63.6%,55.6%,59.6%,62.0%,56.0%,47.1%,63.2%,66.2%,59.0%,65.6%,770
6,eJsFs1sYYL0Eip2aE3OCWe,8.8 minutes,9.0,29 seconds,61.4%,50.0%,67.2%,68.6%,66.7%,54.2%,39.3%,84.0%,62.0%,58.5%,72.1%,620
7,1TqOuo1R1id6wf12owKFjh,8.7 minutes,9.5,27 seconds,65.9%,70.7%,63.6%,65.7%,65.5%,72.5%,73.4%,70.4%,59.8%,58.2%,63.0%,620
8,8ooxNX8kwNyrIAGQ4dVwfh,8.6 minutes,9.5,27 seconds,60.9%,51.5%,38.9%,64.3%,64.4%,57.7%,70.8%,66.7%,68.4%,52.0%,69.8%,610
9,YQfCYfRgf86MKKZ3Nqr0M,8.3 minutes,9.5,26 seconds,60.7%,57.1%,50.0%,63.8%,71.9%,53.3%,62.5%,76.5%,62.0%,50.0%,53.6%,510


In [38]:
win_rates.apply(lambda x: x.str.strip('%'))

Unnamed: 0,Overall Winrate,vs. Demon Hunter,vs. Druid,vs. Hunter,vs. Mage,vs. Paladin,vs. Priest,vs. Rogue,vs. Shaman,vs. Warlock
0,56.8,53.7,47.8,49.1,65.1,60.1,57.1,56.8,61.7,50.0
1,62.7,63.5,61.6,57.5,67.6,70.2,60.0,69.5,65.6,51.8
2,62.0,63.4,65.4,68.3,64.3,68.7,48.6,72.3,66.3,49.4
3,63.3,63.3,53.9,64.6,66.1,61.4,55.4,66.7,74.1,50.9
4,57.9,48.5,52.0,60.0,65.4,69.7,55.8,59.6,62.3,48.3
5,60.1,63.6,55.6,59.6,62.0,56.0,47.1,63.2,66.2,59.0
6,61.4,50.0,67.2,68.6,66.7,54.2,39.3,84.0,62.0,58.5
7,65.9,70.7,63.6,65.7,65.5,72.5,73.4,70.4,59.8,58.2
8,60.9,51.5,38.9,64.3,64.4,57.7,70.8,66.7,68.4,52.0
9,60.7,57.1,50.0,63.8,71.9,53.3,62.5,76.5,62.0,50.0


In [69]:
overview.loc[:, 'Overall Winrate':'vs. Warlock']

Unnamed: 0,Overall Winrate,vs. Demon Hunter,vs. Druid,vs. Hunter,vs. Mage,vs. Paladin,vs. Priest,vs. Rogue,vs. Shaman,vs. Warlock
0,56.8%,53.7%,47.8%,49.1%,65.1%,60.1%,57.1%,56.8%,61.7%,50.0%
1,62.7%,63.5%,61.6%,57.5%,67.6%,70.2%,60.0%,69.5%,65.6%,51.8%
2,62.0%,63.4%,65.4%,68.3%,64.3%,68.7%,48.6%,72.3%,66.3%,49.4%
3,63.3%,63.3%,53.9%,64.6%,66.1%,61.4%,55.4%,66.7%,74.1%,50.9%
4,57.9%,48.5%,52.0%,60.0%,65.4%,69.7%,55.8%,59.6%,62.3%,48.3%
5,60.1%,63.6%,55.6%,59.6%,62.0%,56.0%,47.1%,63.2%,66.2%,59.0%
6,61.4%,50.0%,67.2%,68.6%,66.7%,54.2%,39.3%,84.0%,62.0%,58.5%
7,65.9%,70.7%,63.6%,65.7%,65.5%,72.5%,73.4%,70.4%,59.8%,58.2%
8,60.9%,51.5%,38.9%,64.3%,64.4%,57.7%,70.8%,66.7%,68.4%,52.0%
9,60.7%,57.1%,50.0%,63.8%,71.9%,53.3%,62.5%,76.5%,62.0%,50.0%
