In [84]:
#The core of the method
#!{sys.executable} -m pip install -U selenium
import selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

#Other useful packages
import sys
from bs4 import BeautifulSoup
import requests
import time
from datetime import date
import datetime
import pandas as pd
import numpy as np

#import DeckAnalyzer as DA

In [79]:
#Specify the folder with the driver and the deck to be analyzed
driver_path = 'C:/Users/AU451FE/OneDrive - EY/Desktop/Python/HSreplay_Scraper/chromedriver'
#deck_code = 'KkJ6wKjyCExtjOrYhCNCZe' #Face Hunter
deck_code = 'TDJ576DqVJdHynQToOkWtb' #No Minion Mage

In [98]:
class DeckAnalyzer:
    '''
    Insert the path to the driver and link for the deck hsreplay website and get an analysis of said deck
    
    Redownload the driver here if the version is outdated
    https://chromedriver.chromium.org/
    '''
    def __init__(self, driver_path, deck_code):
        self.driver = webdriver.Chrome(executable_path = driver_path)
        
        self.deck_code = deck_code
        self.title = deck_code

        #self.title = self.driver.title.split()[:-2]     #Define the title of the deck
        #self.title = ' '.join([str(item) for item in self.title])
    
        
        
    def open_driver(self, information):
        '''Put in the information you wish to extract and open a driver with a website containing said information
        '''
        if information == 'Overview':
            self.driver.get(f'https://hsreplay.net/decks/{self.deck_code}/#gameType=RANKED_STANDARD&tab=overview')
        elif information == 'Card info':
            self.driver.get(f'https://hsreplay.net/decks/{self.deck_code}/#gameType=RANKED_STANDARD')
        else:
            raise Exception(f'The desired information is not specified properly.')
        
        self.driver.maximize_window()

        try:
            WebDriverWait(self.driver, 10).until(lambda x: x.find_element_by_class_name('css-flk0bs'))
            self.driver.find_element_by_class_name('css-flk0bs').click()
        except TimeoutException:
            raise Exception('The privacy window has not shown up; try running the script again')

            
    def get_card_info(self):
        '''
        Get the card mana count, name and card count as a list called 'cards'
        '''
        self.open_driver('Card info')
        data = self.driver.find_elements_by_class_name('table-row-header')
        cards = []
        for d in data:
            info = d.text
            txt = info.rsplit('\n')
            if len(txt) == 3:
                mana_cost = int(txt[0])
                card_name = txt[2]
                card_count = int(txt[1].replace('★', '1'))

                row = [mana_cost, card_name, card_count]
                cards.append(row)
            elif len(txt) == 2:
                mana_cost = int(txt[0])
                card_name = txt[1]
                card_count = 1
                
                row = [mana_cost, card_name, card_count]
                cards.append(row)
            else:
                raise Exception('Error - the scraper is not reading the card information properly')
                
        self.driver.quit()
        return cards
    
    def get_further_info(self):
        '''
        Get the remaining statistics about the cards in the deck and return these as a list called 'further_info'
        '''
        self.open_driver('Card info')           
        
        data = self.driver.find_elements_by_class_name('table-cell')
        further_info = []
        for f in range(int(len(data)/6)):
            mull_wr = data[0+6*f].text.replace('▼', '').replace('▲', '')
            per_kept = data[1+6*f].text
            drawn_wr = data[2+6*f].text.replace('▼', '').replace('▲', '')
            played_wr = data[3+6*f].text.replace('▼', '').replace('▲', '')
            turns_held = float(data[4+6*f].text)
            turns_played = float(data[5+6*f].text)
            
            row = [mull_wr, per_kept, drawn_wr, played_wr, turns_held, turns_played]
            further_info.append(row)
    
        self.driver.quit()    
        return further_info
    
    
    def get_card_info_df(self):
        '''
        Analyze the mulligan guide page of the deck and store this information in a data frame
        '''
        print(f'Generating the card info for deck {self.title}')
        card_info = self.get_card_info()
        print(f'Card info obtained')
        further_info = self.get_further_info()
        print(f'Further info obtained')
        df_card = pd.DataFrame(card_info, columns = ['Mana Cost', 'Card Name', 'Card Count'])
        df_further = pd.DataFrame(further_info, columns = ['Mulligan WR', 'Kept', 'Drawn WR', 
                                                           'Played WR', 'Turns Held', 'Turns Played'])
        
        df = pd.concat([df_card, df_further], axis = 1)
        print(f'Final data frame generated')
        return df
    
    
    def get_overview_df(self):
        '''
        Analyze the overview page of the deck and store this information in a data frame
        '''
        print(f'Generating the overview')
        self.open_driver('Overview')
        
        data = self.driver.find_elements_by_xpath("//tr/td[2]")
        
        overview = []
        overview.append(self.deck_code)
        for d in data:
            text = d.text.replace('▼', '').replace('▲', '')
            overview.append(text)
        
        #Add sample size manually
        sample_size = int(self.driver.find_element_by_xpath("//*[@id='deck-container']/div/aside/section/ul/li[1]/span").text.replace(' games', '').replace(',',''))
        overview.append(sample_size)
        
        overview = [overview]
        
        df = pd.DataFrame(overview, columns = ['Deck Code', 'Match Duration', 'Turns', 'Turn Duration', 'Overall Winrate',
                                               'vs. Demon Hunter', 'vs. Druid', 'vs. Hunter',
                                               'vs. Mage', 'vs. Paladin', 'vs. Priest', 'vs. Rogue',
                                               'vs. Shaman', 'vs. Warlock', 'vs. Warrior', 'Sample Size'])
        
        self.driver.quit()
        return df
    
    def write_to_excel(self, today = date.today().strftime("%m-%d")):
        df1 = self.get_overview_df()
        df2 = self.get_card_info_df()
        
        with pd.ExcelWriter(f'C:/Users/AU451FE/OneDrive - EY/Desktop/Python/HSreplay_Scraper/Data Frames/{self.title} {today}.xlsx') as writer:
            df1.to_excel(writer, sheet_name = 'Overview')
            df2.to_excel(writer, sheet_name = 'Card_Info')


In [99]:

D = DeckAnalyzer(driver_path, deck_code)
D.open_driver('Overview')
#df = D.get_overview_df()



In [100]:
D.driver.quit()

In [86]:
help(WebDriverWait)


Help on class WebDriverWait in module selenium.webdriver.support.wait:

class WebDriverWait(builtins.object)
 |  WebDriverWait(driver, timeout, poll_frequency=0.5, ignored_exceptions=None)
 |  
 |  Methods defined here:
 |  
 |  __init__(self, driver, timeout, poll_frequency=0.5, ignored_exceptions=None)
 |      Constructor, takes a WebDriver instance and timeout in seconds.
 |      
 |      :Args:
 |       - driver - Instance of WebDriver (Ie, Firefox, Chrome or Remote)
 |       - timeout - Number of seconds before timing out
 |       - poll_frequency - sleep interval between calls
 |         By default, it is 0.5 second.
 |       - ignored_exceptions - iterable structure of exception classes ignored during calls.
 |         By default, it contains NoSuchElementException only.
 |      
 |      Example:
 |       from selenium.webdriver.support.ui import WebDriverWait 
 |      
 |       element = WebDriverWait(driver, 10).until(lambda x: x.find_element_by_id("someId")) 
 |      
 |     

In [76]:
df

Unnamed: 0,Deck Code,Match Duration,Turns,Turn Duration,Overall Winrate,vs. Demon Hunter,vs. Druid,vs. Hunter,vs. Mage,vs. Paladin,vs. Priest,vs. Rogue,vs. Shaman,vs. Warlock,vs. Warrior,Sample Size
0,TDJ576DqVJdHynQToOkWtb,8.4 minutes,9.0,28 seconds,56.4%,54.0%,53.6%,42.8%,64.3%,49.6%,67.8%,57.2%,47.9%,66.2%,59.4%,41000


In [83]:
dir(D.driver)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_file_detector',
 '_is_remote',
 '_mobile',
 '_switch_to',
 '_unwrap_value',
 '_web_element_cls',
 '_wrap_value',
 'add_cookie',
 'application_cache',
 'back',
 'capabilities',
 'close',
 'command_executor',
 'create_options',
 'create_web_element',
 'current_url',
 'current_window_handle',
 'delete_all_cookies',
 'delete_cookie',
 'desired_capabilities',
 'error_handler',
 'execute',
 'execute_async_script',
 'execute_cdp_cmd',
 'execute_script',
 'file_detector',
 'file_detector_context',
 'find_element',
 'find_element_by_class_name',
 'find_element_by_css_selector',
 'find_element_by_id',
 

In [None]:
#Opens the external browser for analysis
#D = DA.DeckAnalyzer(driver_path, deck_code)
#D.write_to_excel()

D = DeckAnalyzer(driver_path, deck_code)