# Imports

In [13]:
import time, json, os
from selenium import webdriver
# import by and keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium_stealth import stealth


# Constants

In [14]:
BASE_URL = "https://mtgdecks.net/"
FORMAT_LIST = [
  'Standard',
  'Pioneer',
  'Modern',
  'Pauper',
  'Alchemy',
  'Explorer',
  'Historic',
  'Timeless',
  'Commander',
  'Duel-Commander',
  'Brawl',
  'Historic-Brawl',
  'Legacy',
  'Vintage',
  'Premodern',
  'Old-school'
]

options = webdriver.ChromeOptions()
# options.add_argument("start-maximized")
# options.add_argument("--headless=new")
# options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument("--start-minimized")

# tentativa de otimização
options.add_argument("--disable-gpu")
options.add_argument("--blink-settings=imagesEnabled=false")  # Desativa imagens
options.add_argument("--disable-extensions")  # Remove extensões
options.add_argument("--disable-blink-features=AutomationControlled")  # Esconde Selenium
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
prefs = {
    "profile.managed_default_content_settings.images": 2,  # Bloqueia imagens
    "profile.managed_default_content_settings.stylesheets": 2,  # Bloqueia CSS
    "profile.managed_default_content_settings.fonts": 2,  # Bloqueia fontes
}
options.add_experimental_option("prefs", prefs)
options.page_load_strategy = "eager"

def DefaultDriver():
  driver = webdriver.Chrome(options=options)
  driver.minimize_window()
  return driver

FILENAME = "mtgdecks.json"

# Types

In [15]:
class Archetype:
  def __init__(self, name, tier, deckQuantity, format, url=''):
    self.name = name
    self.tier = tier
    self.deckQuantity = deckQuantity
    self.format = format
    self.url = self.makeUlr(name, format) if url == '' else url
    self.deckList = []
    
  def makeUlr(self, name: str, format: str):
    name = name.replace(" ", "-").lower()
    name = name.replace("/", "-")
    name = name.replace(",", "")
    name = name.replace("'", "-")
    return f"https://mtgdecks.net/{format}/{name}"
  
  def __str__(self):
    return f"[{self.name} - {self.tier} - {self.deckQuantity} - {self.format} - {self.url}]"
  
  def __repr__(self):
    return f"[{self.name} - {self.tier} - {self.deckQuantity} - {self.format} - {self.url}]"
  
  def toDict(self):
    deckListDict = []
    for deck in self.deckList:
      deckListDict.append(deck.toDict())
    return {
      "name": self.name,
      "tier": self.tier,
      "deckQuantity": self.deckQuantity,
      "format": self.format,
      "url": self.url,
      "deckList": deckListDict
    }

class Deck:
  def __init__(self, name, deckType, url, _type, rank, playersQuantity):
    self.name = name
    self.deckType = deckType
    self.playersQuantity = 0
    self.type = _type
    self.rank = rank
    self.url = url
    self.cards = []
    
  def __str__(self):
    return f"[{self.name} - {self.deckType.name} - {self.playersQuantity} - {self.type} - {self.rank} - {self.url}]"
  
  def __repr__(self):
    return f"[{self.name} - {self.deckType.name} - {self.playersQuantity} - {self.type} - {self.rank} - {self.url}]"
  
  def toDict(self):
    cardsDict = []
    # for card in self.cards:
    #   cardsDict.append(card.toDict())
    return {
      "name": self.name,
      "deckType": self.deckType.name,
      "playersQuantity": self.playersQuantity,
      "type": self.type,
      "rank": self.rank,
      "url": self.url,
      "cards": cardsDict
    }


# General Methods

In [16]:
def getFormatUrlSufix(format):
  '''Function to get the url sufix for the format'''
  return {
    'Standard': "/metagame:last-2-years",
    'Pioneer': "/metagame:last-2-years",
    'Modern': "/metagame:last-2-years",
    'Pauper': "/metagame:last-2-years",
    'Alchemy': "/metagame:last-2-years",
    'Explorer': "/metagame:last-2-years",
    'Historic': "/metagame:last-2-years",
    'Timeless': "/metagame:last-2-years",
    'Commander': "/date-6",
    'Duel-Commander': "/date-6",
    'Brawl': "/date-6",
    'Historic-Brawl': "/date-6",
    'Legacy': "/metagame:last-2-years",
    'Vintage': "/metagame:last-2-years",
    'Premodern': "/metagame:last-2-years",
    'Old-school': "/metagame:last-2-years"
  }[format]
  
def gotoFormatUrl(format: str, shouldQuit: bool = False, skipShowAll: bool = False):
  '''Enter in the mtg format page'''
  driver = DefaultDriver()
  print(f"Scrap de {format}")
  try: 
    stealth(driver,
        languages=["en-US", "en"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True,
        )
    
    driver.get(BASE_URL + format + getFormatUrlSufix(format))
    print("Acessando " + format + " em " + BASE_URL + format + getFormatUrlSufix(format))
    time.sleep(1)

    driver.execute_script("window.scrollTo(0, document.body.scrollHeight * 0.5);")
    print('SCROLLOU')
    time.sleep(1)
    # seleciona o botao de show all do id archetypes-loader ou o id commander-loader
    # tirei pra buceta do commander
    if(not skipShowAll):
      WebDriverWait(driver, 10).until(lambda driver: driver.find_element(By.CSS_SELECTOR, '#archetypes-loader, #commander-loader'))
      loadMoreButton = driver.find_element(By.CSS_SELECTOR, '#archetypes-loader, #commander-loader')

      if (loadMoreButton != None):
        loadMoreButton.click()
    else:
      print("CLICA AGORA")
      time.sleep(5)

    #time.sleep(5)
    if (shouldQuit):
      driver.quit()
    return True, driver
  except Exception as e:
    print(f"Error on access format {format}: {e}")
    driver.quit()
    return False, driver
  
def getArchetypeFromHtml(archetypeListItem, format):
  '''Given a html item of a archetype list, returns it archetype object'''
  # Ordem dos td: imagem, nome, meta share, tier, decks quantity, price, ??
  name = archetypeListItem.find_element(By.CSS_SELECTOR, 'td:nth-child(2) a').text
  tier = archetypeListItem.find_element(By.CSS_SELECTOR, 'td:nth-child(4)').text
  deckQuantity = archetypeListItem.find_element(By.CSS_SELECTOR, 'td:nth-child(5)').text
  return Archetype(name, tier, deckQuantity, format)
  
def getDeckInfoFromHtml(rowInfo, rowUrl, deckType):
  '''Given a html item of a deck list, returns it deck object'''
  valuesInfo = rowInfo.find_elements(By.CSS_SELECTOR, 'td')
  valuesUrl = rowUrl.find_elements(By.CSS_SELECTOR, 'td')

  rank = valuesInfo[0].text.split('\n')[0]
  name = valuesInfo[2].text.replace('\n', ' ')
  _type = valuesInfo[4].text
  playersQuantity = valuesInfo[7].text
  playersQuantity = int(playersQuantity) if playersQuantity != '' else 0
  url = valuesUrl[1].find_element(By.CSS_SELECTOR, 'a').get_attribute('href')

  return Deck(name, deckType, url, _type, rank, playersQuantity)

def getAllArchetypes(format, driver: webdriver.Chrome):
  allArchetypes = {}
  
  WebDriverWait(driver, 10).until(lambda driver: driver.find_element(By.CSS_SELECTOR, '#allArchetypes, #commanders'))
  allArchetypesTable = driver.find_element(By.CSS_SELECTOR, '#allArchetypes, #commanders')
  # pega o tbody
  allArchetypesList = allArchetypesTable.find_element(By.CSS_SELECTOR, 'tbody')
  allArchetypesList = allArchetypesList.find_elements(By.CSS_SELECTOR, 'tr')
  print(f'Getting archetypes from {format}')
  for archetypeListItem in allArchetypesList:
    archetype = getArchetypeFromHtml(archetypeListItem, format)
    allArchetypes[archetype.name] = archetype
    
  return allArchetypes

def getDeckFromArchetype(format, archetype: Archetype):
  # PEGA A LISTA DE DECKS POR ARCHETYPE
  hasEnded = False
  page = 1
  retries = 0
  allDecksList = []
  driver = None
  print(f'Getting decks from {archetype.name} format {format} from url {archetype.url}')
  while (not hasEnded):
    try:
      if driver != None:
        driver.quit()
      driver = DefaultDriver()
      driver.get(archetype.url + f"/page:{page}")

      # PEGA A LISTA DE DECKS DE UMA PAGINA
      tableText = driver.find_element(By.CSS_SELECTOR, 'table.clickable.table.table-striped.hidden-xs tbody')
      tableUrl = driver.find_element(By.CSS_SELECTOR, 'table.clickable.table.table-striped.hidden-sm.hidden-md.hidden-lg tbody')
      rowsText = tableText.find_elements(By.CSS_SELECTOR, 'tr')   # o primeiro n tem nada, ignorar
      rowsUrl = tableUrl.find_elements(By.CSS_SELECTOR, 'tr')   # o primeiro n tem nada, ignorar

      for i in range(1, len(rowsText)):
        deck = getDeckInfoFromHtml(rowsText[i], rowsUrl[i], archetype)
        allDecksList.append(deck)
      print(f'Fez a pagina {page} - {archetype.name}')
      
      if ('disabled' in driver.find_element(By.CSS_SELECTOR, 'ul.pagination').find_elements(By.CSS_SELECTOR, 'li')[-1].get_attribute('class')):
        hasEnded = True
      page += 1
      retries = 0
    except Exception as e:
      print(f"Error on page {page} on {archetype.name} format {format}: {e}")
      retries += 1
      if retries > 3:
        page += 1
        
  return allDecksList

def save(allDecksByFormat, fileName=None):
  if fileName == None:
    fileName = FILENAME
  # verifica se tem o prefixo /data/ no nome do arquivo
  if not os.path.exists('data'):
    os.makedirs('data')
  if not fileName.startswith('data/'):
    fileName = 'data/' + fileName
  
  print("SAVING FILE")
  with open(fileName, 'w') as file:
    json.dump(allDecksByFormat, file, indent=2, default=lambda x: x.toDict())
    
def getData():
  filename = 'data/' + FILENAME
  # check if file exists
  if not os.path.exists(filename):
    return {}
  
  with open(filename, 'r') as file:
    return json.load(file)
  
  
def getDeckCards(deckUrl):
  driver = DefaultDriver()
  driver.get(deckUrl)
  
  WebDriverWait(driver, 10).until(lambda driver: driver.find_element(By.CSS_SELECTOR, 'div.wholeDeck div.cards tbody tr.cardItem'))
  cards = driver.find_elements(By.CSS_SELECTOR, 'div.wholeDeck div.cards tbody tr.cardItem')
  cardList = []
  for card in cards:
    cardInfo = card.find_elements(By.CSS_SELECTOR, 'td')[0].text
    cardQuantity = cardInfo.split(' ')[0]
    cardName = ' '.join(cardInfo.split(' ')[1:]).strip()
    cardList += [cardName] * int(cardQuantity)
    
  driver.quit()
  return cardList


def closeAllDrivers():
  import os
  drivers = ['chromedriver', 'geckodriver', 'msedgedriver', 'operadriver']
  for driver in drivers:
    os.system(f"taskkill /f /im {driver} 2>nul")
    
  

# Running Area

- Steps:
  - Get all formats - `gotoFormatUrl(format: str, shouldQuit: bool)`
  - Get all archetypes by format - `getAllArchetypes(format: str, driver: webdriver.Chrome)`
  - Get all decks by archetype - `getDeckFromArchetype(format: str, archetype: Archetype, driver: webdriver.Chrome)`
  - Get all cards by decks - `TODO`

- Already runned for:
  - Got all archetypes by format
  - Got [0:6] decks from standart, Pioneer and Modern until DIMIR DEATH'S SHADOW (Not done)

In [17]:
jsonDatabase = getData()

In [7]:
# Ja fiz tudo
# for format in FORMAT_LIST[8:]:
#   currentFormat = format
#   success, driver = gotoFormatUrl(currentFormat, skipShowAll=(format in ['Commander', 'Duel-Commander', 'Brawl', 'Historic-Brawl']))
#   print(f"Success: {success}")
#   archetypes = getAllArchetypes(currentFormat, driver)
#   jsonDatabase[currentFormat] = archetypes

In [8]:
save(jsonDatabase)

SAVING FILE


In [None]:
for currentFormat in FORMAT_LIST[7:8]:
  for archetypeName, archetype in list(jsonDatabase[currentFormat].items())[0:5]:
    if(type(archetype) == dict):
      print("Converting dict to Archetype")
      archetype = Archetype(archetypeName, archetype['tier'], archetype['deckQuantity'], currentFormat, archetype['url'])
    print(f"Getting decks from {archetypeName} format {currentFormat}")
    decks = getDeckFromArchetype(currentFormat, archetype)
    archetype.deckList = decks
    jsonDatabase[currentFormat][archetypeName] = archetype

Converting dict to Archetype
Getting decks from DEATH AND TAXES format Timeless
Getting decks from DEATH AND TAXES format Timeless from url https://mtgdecks.net/Timeless/death-taxes
Fez a pagina 1 - DEATH AND TAXES
Fez a pagina 2 - DEATH AND TAXES
Fez a pagina 3 - DEATH AND TAXES


In [None]:
currentFormat = FORMAT_LIST[8]
nDecks = 0
for archetypeName, archetype in list(jsonDatabase[currentFormat].items())[0:5]:
  if(type(archetype) == dict):
    archetype = Archetype(archetypeName, archetype['tier'], archetype['deckQuantity'], currentFormat, archetype['url'])
  archetype.deckQuantity = int(archetype.deckQuantity) if archetype.deckQuantity.isnumeric() else int(archetype.deckQuantity[1:])
  nDecks += archetype.deckQuantity
print(f"Total de decks: {nDecks}")

Converting dict to Archetype
Converting dict to Archetype
Converting dict to Archetype
Converting dict to Archetype
Converting dict to Archetype
Total de decks: 25352


In [11]:
closeAllDrivers()

In [57]:
save(jsonDatabase)

SAVING FILE


In [None]:
save(jsonDatabase['Standard'], fileName='mtgDeck.standard.json')
save(jsonDatabase['Pioneer'], fileName='mtgDeck.pioneer.json')
save(jsonDatabase['Modern'], fileName='mtgDeck.modern.json')
save(jsonDatabase['Pauper'], fileName='mtgDeck.pauper.json')
save(jsonDatabase['Alchemy'], fileName='mtgDeck.alchemy.json')
save(jsonDatabase['Explorer'], fileName='mtgDeck.explorer.json')
save(jsonDatabase['Historic'], fileName='mtgDeck.historic.json')

SAVING FILE
SAVING FILE
SAVING FILE
SAVING FILE
SAVING FILE


In [58]:
save(jsonDatabase['Timeless'], fileName='mtgDeck.timeless.json')

SAVING FILE


In [63]:
# count all decks
totalDecks = 0
for format in FORMAT_LIST:
  # print(f"Counting decks from {format}")
  for archetypeName, archetype in jsonDatabase[format].items():
    if(type(archetype) == dict):
      archetypeObj = Archetype(archetypeName, archetype['tier'], archetype['deckQuantity'], format)
      archetypeObj.deckList = archetype['deckList']
      archetype = archetypeObj
    totalDecks += len(archetype.deckList)
totalDecks

362965

In [37]:
numberOfDecksByFormat = {}
for currentFormat in FORMAT_LIST:
  numberOfDecksByFormat[currentFormat] = 0
  for archetypeName, archetype in jsonDatabase[currentFormat].items():
    if(type(archetype) == dict):
      archetypeObj = Archetype(archetypeName, archetype['tier'], archetype['deckQuantity'], currentFormat)
      archetypeObj.deckList = archetype['deckList']
      archetype = archetypeObj
    if(archetype.deckQuantity.isnumeric()):
      numberOfDecksByFormat[currentFormat] += int(archetype.deckQuantity)
    else:
      numberOfDecksByFormat[currentFormat] += int(archetype.deckQuantity[1:])
numberOfDecksByFormat
  

{'Standard': 5087,
 'Pioneer': 61992,
 'Modern': 70976,
 'Pauper': 59370,
 'Alchemy': 355,
 'Explorer': 4917,
 'Historic': 1565,
 'Timeless': 729,
 'Commander': 1419145,
 'Duel-Commander': 770776,
 'Brawl': 40162,
 'Historic-Brawl': 588930,
 'Legacy': 52942,
 'Vintage': 19122,
 'Premodern': 11251,
 'Old-school': 727}

In [40]:
total = sum(numberOfDecksByFormat.values())
total

3108046

In [30]:
x = 0
for mode in list(jsonDatabase.keys())[4:]:
  x += len(jsonDatabase[mode])
x

5123

In [63]:
x = {}
for mode in list(jsonDatabase.keys())[:7]:
  x[mode] = 0
  for archetype in jsonDatabase[mode].values():
    if type(archetype) == dict:
      archetypeObj = Archetype(archetype['name'], archetype['tier'], archetype['deckQuantity'], 'Standard', archetype['url'])
      archetypeObj.deckList = archetype['deckList']
      archetype = archetypeObj
    x[mode] += len(archetype.deckList)
x

{'Standard': 6680,
 'Pioneer': 103225,
 'Modern': 140816,
 'Pauper': 70794,
 'Alchemy': 4989,
 'Explorer': 16609,
 'Historic': 11699}

In [60]:
x = {mode: len(jsonDatabase[mode]) for mode in list(jsonDatabase.keys())}
sorted(x.items(), key=lambda x: x[1], reverse=True)

[('Historic-Brawl', 1458),
 ('Commander', 1055),
 ('Duel-Commander', 731),
 ('Modern', 609),
 ('Pioneer', 520),
 ('Pauper', 431),
 ('Legacy', 425),
 ('Explorer', 289),
 ('Brawl', 259),
 ('Historic', 250),
 ('Vintage', 207),
 ('Standard', 189),
 ('Premodern', 140),
 ('Timeless', 136),
 ('Alchemy', 94),
 ('Old-school', 74)]

In [None]:
# pega todos os decks que tem 's na url
# for format in FORMAT_LIST:
#   for archetypeName, archetype in jsonDatabase[format].items():
#     archetype = jsonDatabase[format][archetypeName]
#     if type(archetype) == dict:
#       archetype = Archetype(archetypeName, archetype['tier'], archetype['deckQuantity'], format)
#     url = archetype.url
#     if (type(jsonDatabase[format][archetypeName]) == dict):
#       if (jsonDatabase[format][archetypeName]['url'] != url):
#         print(f"Updating url from {jsonDatabase[format][archetypeName]['url']} to {url}")
#       jsonDatabase[format][archetypeName]['url'] = url
#     elif(type(jsonDatabase[format][archetypeName]) == Archetype):
#       if (jsonDatabase[format][archetypeName].url != url):
#         print(f"Updating url from {jsonDatabase[format][archetypeName].url} to {url}")
#       jsonDatabase[format][archetypeName].url = url

Updating url from https://mtgdecks.net/Modern/dimir-death's-shadow to https://mtgdecks.net/Modern/dimir-death-s-shadow
Updating url from https://mtgdecks.net/Modern/inti's-cookbook to https://mtgdecks.net/Modern/inti-s-cookbook
Updating url from https://mtgdecks.net/Modern/grixis-death's-shadow to https://mtgdecks.net/Modern/grixis-death-s-shadow
Updating url from https://mtgdecks.net/Modern/rakdos-death's-shadow to https://mtgdecks.net/Modern/rakdos-death-s-shadow
Updating url from https://mtgdecks.net/Modern/jund-death's-shadow to https://mtgdecks.net/Modern/jund-death-s-shadow
Updating url from https://mtgdecks.net/Modern/sultai-death's-shadow to https://mtgdecks.net/Modern/sultai-death-s-shadow
Updating url from https://mtgdecks.net/Modern/orzhov-death's-shadow to https://mtgdecks.net/Modern/orzhov-death-s-shadow
Updating url from https://mtgdecks.net/Modern/4-color-death's-shadow to https://mtgdecks.net/Modern/4-color-death-s-shadow
Updating url from https://mtgdecks.net/Modern/ma