In [None]:
# @title Enter decklist URLs
# @markdown ##### Run cell to allow input.
# @markdown ##### <br><b>Input format:</b> URL - deck name (optional)
# @markdown ##### <b>Example:</b> https://www.db.yugioh-card.com/rushdb/member_deck.action?cgid=fabfd353f95aa46e4617efe9a504fa54&dno=28 - Dark Magician
# @markdown ##### <br>You can enter multiple lines of input.
# @markdown ##### When the deck name isn't given, the deck name on Neuron will be used instead.
# @markdown ##### <br><b>Do not rerun cell after entering your input.</b> Move on to the next cell.

import ipywidgets as widgets

urls=widgets.Textarea(
    placeholder='URL - deck name (optional)',
    description='Input:',
    layout=widgets.Layout(width='90%', height='200px')
)

display(urls)

In [None]:
# @title Scrape decklists
# @markdown Use English card names from Yugipedia:
english_names = True # @param {"type":"boolean"}

import csv
import requests
from requests.adapters import HTTPAdapter, Retry
from lxml import html
from time import sleep

def islegend(value):
    if value=='L':
        return 'Legend'
    return 'Non-Legend'

def islimit(value):
    if 'semi_limited' in value:
        return 'Semi-Limited'
    if 'limited' in value:
        return 'Limited'
    if 'forbidden' in value:
        return 'Forbidden'
    return 'Unlimited'

def cardtype(value):
    if 'spell' in value:
        return 'Spell'
    if 'trap' in value:
        return 'Trap'
    return 'Monster'

stored_names={}
half2full=dict((i,i+0xFEE0) for i in range(0x21,0x7F))
def jp2en(value):
    value=value.translate(half2full)
    try:
        en=stored_names[value]
    except:
        try:
          en=list(s.get('https://yugipedia.com/api.php?action=ask&query=[[Japanese%20base%20name::'+value+']]%20AND%20[[Rush%20Duel%20status::%2B]]&format=json',headers={'User-agent':'Mozilla/5.0'}).json()['query']['results'].keys())[0]
        except:
          print('Failed to find English name for '+value)
          en=value
        if en[-12:]==' (Rush Duel)':
            en=en[:-12]
        en=en.replace('(','[').replace(')',']')
        stored_names[value]=en
    return en

decks=[url.split(' - ') for url in urls.value.split('\n')]
total=str(len(decks))
data=[['Deck','Name','Copy','Card Type','Legend','Limit','Deck Name','URL']]
progress=0
s=requests.Session()
retries=Retry(total=100,backoff_factor=0.1,status_forcelist=[500,502,503,504])
s.mount('http://', HTTPAdapter(max_retries=retries))

for deck in decks:
    progress+=1
    print('Scraping Deck '+str(progress)+' of '+total+'.')
    page=s.get(deck[0])
    print('Page downloaded.')
    tree=html.fromstring(page.content)
    try:
      deck[1]
    except:
      deck.append(tree.xpath('//header[@id="broad_title"]//h1/text()')[0].strip())
    mon=tree.xpath('//table[@id="monster_list"]//div[@class="icon"]/span/text()')
    spell=tree.xpath('//table[@id="spell_list"]//div[@class="icon"]/span/text()')
    trap=tree.xpath('//table[@id="trap_list"]//div[@class="icon"]/span/text()')
    extra=tree.xpath('//table[@id="extra_list"]//div[@class="icon"]/span/text()')
    side=tree.xpath('//table[@id="side_list"]//div[@class="icon"]/span/text()')
    moncopy=tree.xpath('//table[@id="monster_list"]//td[@class="num"]/span/text()')
    moncopy=[copy.strip() for copy in moncopy]
    spellcopy=tree.xpath('//table[@id="spell_list"]//td[@class="num"]/span/text()')
    spellcopy=[copy.strip() for copy in spellcopy]
    trapcopy=tree.xpath('//table[@id="trap_list"]//td[@class="num"]/span/text()')
    trapcopy=[copy.strip() for copy in trapcopy]
    extracopy=tree.xpath('//table[@id="extra_list"]//td[@class="num"]/span/text()')
    extracopy=[copy.strip() for copy in extracopy]
    sidecopy=tree.xpath('//table[@id="side_list"]//td[@class="num"]/span/text()')
    sidecopy=[copy.strip() for copy in sidecopy]
    monlegend=tree.xpath('//table[@id="monster_list"]//div[@class="icon"]/span[last()]//text()')
    spelllegend=tree.xpath('//table[@id="spell_list"]//div[@class="icon"]/span[last()]//text()')
    traplegend=tree.xpath('//table[@id="trap_list"]//div[@class="icon"]/span[last()]//text()')
    extralegend=tree.xpath('//table[@id="extra_list"]//div[@class="icon"]/span[last()]//text()')
    sidelegend=tree.xpath('//table[@id="side_list"]//div[@class="icon"]/span[last()]//text()')
    monlimit=tree.xpath('//table[@id="monster_list"]//tr/@class')
    spelllimit=tree.xpath('//table[@id="spell_list"]//tr/@class')
    traplimit=tree.xpath('//table[@id="trap_list"]//tr/@class')
    extralimit=tree.xpath('//table[@id="extra_list"]//tr/@class')
    sidelimit=tree.xpath('//table[@id="side_list"]//tr/@class')
    sidetype=tree.xpath('//table[@id="side_list"]//td[@class="c_img"]/img/@src')
    print('Page parsing done.')
    if english_names:
        mon=[jp2en(name) for name in mon]
        spell=[jp2en(name) for name in spell]
        trap=[jp2en(name) for name in trap]
        extra=[jp2en(name) for name in extra]
        side=[jp2en(name) for name in side]
        print('English name searching done.')
    datamon=[['Main Deck',mon[i],moncopy[i],'Monster',islegend(monlegend[i]),islimit(monlimit[i]),deck[1],deck[0]] for i in range(len(mon))]
    dataspell=[['Main Deck',spell[i],spellcopy[i],'Spell',islegend(spelllegend[i]),islimit(spelllimit[i]),deck[1],deck[0]] for i in range(len(spell))]
    datatrap=[['Main Deck',trap[i],trapcopy[i],'Trap',islegend(traplegend[i]),islimit(traplimit[i]),deck[1],deck[0]] for i in range(len(trap))]
    dataextra=[['Extra Deck',extra[i],extracopy[i],'Monster',islegend(extralegend[i]),islimit(extralimit[i]),deck[1],deck[0]] for i in range(len(extra))]
    dataside=[['Side Deck',side[i],sidecopy[i],cardtype(sidetype[i]),islegend(sidelegend[i]),islimit(sidelimit[i]),deck[1],deck[0]] for i in range(len(side))]
    data+=datamon+dataspell+datatrap+dataextra+dataside
    print('Data parsing done.')

with open('decklists.csv','w',newline='',encoding='utf-8-sig') as f:
    write=csv.writer(f)
    write.writerows(data)

print('Scraping done.')


In [None]:
# @title Download decklists
# @markdown ##### Run cell to download the csv file.
# @markdown ##### <br>Otherwise, click on the folder icon on the left sidebar, right click on "decklists.csv", and click "Download".

from google.colab import files
files.download('decklists.csv')

In [None]:
# @title Convert decklists to text
# @markdown ##### You need to run the <b>Enter decklist URLs</b> and <b>Scrape decklists</b> cells first.
# @markdown ##### <br>OR
# @markdown ##### <br>You can tick the box below and upload your own CSV file. (Must be formatted the same way as the CSV file obtained from the <b>Download decklists</b> cell)
upload_csv = False # @param {"type":"boolean"}

import csv
from google.colab import files

if upload_csv:
    csvfile=files.upload()
    data=list(csv.reader(open(list(csvfile.keys())[0],encoding='utf-8-sig')))
decklist={}
for row in data[1:]:
    try:
        decklist[row[7]][row[0]].append([row[2],row[1]])
    except:
        decklist[row[7]]={'URL':row[7],'Name':row[6],'Main Deck':[[row[2],row[1]]],'Extra Deck':[],'Side Deck':[]}

textlist=''
for deck in decklist.keys():
    textlist+=decklist[deck]['URL']+'\n'
    textlist+=decklist[deck]['Name']+'\n'
    maindeck=decklist[deck]['Main Deck']
    maincount=str(sum([int(card[0]) for card in maindeck]))
    maintext='\n'.join([card[0]+'x '+card[1] for card in maindeck])
    textlist+='Main Deck ('+maincount+'):\n'+maintext+'\n'
    extradeck=decklist[deck]['Extra Deck']
    extracount=str(sum([int(card[0]) for card in extradeck]))
    extratext='\n'.join([card[0]+'x '+card[1] for card in extradeck])
    textlist+='Extra Deck ('+extracount+'):\n'+extratext+'\n'
    sidedeck=decklist[deck]['Side Deck']
    sidecount=str(sum([int(card[0]) for card in sidedeck]))
    sidetext='\n'.join([card[0]+'x '+card[1] for card in sidedeck])
    textlist+='Side Deck ('+sidecount+'):\n'+sidetext+'\n'
    textlist+='\n'

print(textlist)