In [1]:
#The core of the method
#!{sys.executable} -m pip install -U selenium
import selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys


#Other useful packages
import sys
from bs4 import BeautifulSoup
import requests
import time
from datetime import date
import datetime
import pandas as pd
import numpy as np

In [2]:
#Specify the folder with the driver and the deck to be analyzed
driver_path = 'C:/Users/AU451FE/OneDrive - EY/Desktop/Python/HSreplay Scraper/chromedriver'
deck_code = 'KkJ6wKjyCExtjOrYhCNCZe'

In [4]:
class DeckAnalyzer:
    '''
    Insert the path to the driver and link for the deck hsreplay website and get an analysis of said deck
    
    Redownload the driver here if the version is outdated
    https://chromedriver.chromium.org/
    '''
    def __init__(self, driver_path, deck_code):
        self.deck_code = deck_code
        self.driver = webdriver.Chrome(executable_path = driver_path)
        self.driver.get(f'https://hsreplay.net/decks/{self.deck_code}/#gameType=RANKED_STANDARD')
        
        self.driver.maximize_window()      #Maximize the window
        self.title = self.driver.title
        
        print("Waiting for the privacy settings window to pop up")       #Agree to the privacy settings
        time.sleep(1.5)

        try:
            agree = self.driver.find_element_by_class_name('css-flk0bs')
            agree.click()
        except:
            pass
        print("Privacy settings window closed")
        
    def get_card_info(self):
        '''
        Get the card mana count, name and card count as a list called 'cards'
        '''
        if not self.driver.current_url == f'https://hsreplay.net/decks/{self.deck_code}/#gameType=RANKED_STANDARD':
            self.driver.get(f'https://hsreplay.net/decks/{self.deck_code}/#gameType=RANKED_STANDARD')
        count = self.driver.find_elements_by_class_name('table-row-header')
        cards = []
        for c in count:
            info = c.text
            txt = info.rsplit('\n')
            if len(txt) == 3:
                mana_cost = txt[0]
                card_name = txt[2]
                card_count = txt[1].replace('★', '1')

                row = [mana_cost, card_name, card_count]
                cards.append(row)
            elif len(txt) == 2:
                mana_cost = txt[0]
                card_name = txt[1]
                card_count = 1

                row = [mana_cost, card_name, card_count]
                cards.append(row)
            else:
                print('Error - the scraper is not reading the card information properly')
                break

        return cards
    
    def get_further_info(self):
        '''
        Get the remaining statistics about the cards in the deck and return these as a list called 'further_info'
        '''
        if not self.driver.current_url == f'https://hsreplay.net/decks/{self.deck_code}/#gameType=RANKED_STANDARD':
            self.driver.get(f'https://hsreplay.net/decks/{self.deck_code}/#gameType=RANKED_STANDARD')
            
        further_raw = self.driver.find_elements_by_class_name('table-cell')
        further_info = []
        for f in range(int(len(further_raw)/6)):
            mull_wr = further_raw[0+6*f].text.replace('▼', '').replace('▲', '')
            per_kept = further_raw[1+6*f].text
            drawn_wr = further_raw[2+6*f].text.replace('▼', '').replace('▲', '')
            played_wr = further_raw[3+6*f].text.replace('▼', '').replace('▲', '')
            turns_held = further_raw[4+6*f].text
            turns_played = further_raw[5+6*f].text
            
            row = [mull_wr, per_kept, drawn_wr, played_wr, turns_held, turns_played]
            further_info.append(row)

        return further_info
    
    
    def get_data_frame(self):
        '''
        Return a data frame with all information avilable on the deck
        '''
        print(f'Generating the data frame for deck {self.title}')
        card_info = self.get_card_info()
        further_info = self.get_further_info()
        df_card = pd.DataFrame(card_info, columns = ['Mana Cost', 'Card Name', 'Card Count'])
        df_further = pd.DataFrame(further_info, columns = ['Mulligan WR', 'Kept', 'Drawn WR', 
                                                           'Played WR', 'Turns Held', 'Turns Played'])
        df = pd.concat([df_card, df_further], axis = 1)
        
        return df
    
    
    def get_overview(self):
        '''
        Analyze the overview page of the deck
        '''
        if not self.driver.current_url == f'https://hsreplay.net/decks/{self.deck_code}/#gameType=RANKED_STANDARD&tab=overview':
            self.driver.get(f'https://hsreplay.net/decks/{self.deck_code}/#gameType=RANKED_STANDARD&tab=overview')
        
        return None
        
        

In [6]:
#Test driver
T = DeckAnalyzer(driver_path, deck_code)
dir(T.driver)


Waiting for the privacy settings window to pop up
Privacy settings window closed


['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_file_detector',
 '_is_remote',
 '_mobile',
 '_switch_to',
 '_unwrap_value',
 '_web_element_cls',
 '_wrap_value',
 'add_cookie',
 'application_cache',
 'back',
 'capabilities',
 'close',
 'command_executor',
 'create_options',
 'create_web_element',
 'current_url',
 'current_window_handle',
 'delete_all_cookies',
 'delete_cookie',
 'desired_capabilities',
 'error_handler',
 'execute',
 'execute_async_script',
 'execute_cdp_cmd',
 'execute_script',
 'file_detector',
 'file_detector_context',
 'find_element',
 'find_element_by_class_name',
 'find_element_by_css_selector',
 'find_element_by_id',
 

In [7]:
help(T.driver.get)

Help on method get in module selenium.webdriver.remote.webdriver:

get(url) method of selenium.webdriver.chrome.webdriver.WebDriver instance
    Loads a web page in the current browser session.

