In [1]:
import requests
from requests_html import HTMLSession
from requests_html import AsyncHTMLSession
from bs4 import BeautifulSoup
import pprint
import numpy as np
import pandas as pd
import time
import pickle
import re

In [2]:
#source: https://stackoverflow.com/questions/1987694/how-to-print-the-full-numpy-array-without-truncation
#function to print out full numpy array instead of excerpt
def fullprint(*args, **kwargs):
    from pprint import pprint
    import numpy
    opt = numpy.get_printoptions()
    numpy.set_printoptions(threshold=numpy.inf)
    pprint(*args, **kwargs)
    numpy.set_printoptions(**opt)
    

#parse HTML text to clean up tabs and return array of tabs (get rid of all the spaces)
def parseTab(TAB):
    lines = [] #will hold all lines of the tab
    for line in TAB.children: #iterate through lines of tab
        #only parse lines with notes (ignores lyric lines and chord lines)
        if 'e|' == line.text[0:2] or 'B|' == line.text[0:2] or 'G|' == line.text[0:2] or 'D|' == line.text[0:2] or 'A|' == line.text[0:2] or 'E|' == line.text[0:2]:
            #print(line.text)
            val = list(line.text)
            if(lines):
                val = val[0:len(lines[0])] #some lines have additional text at the end so removes that
            lines.append(val) #adds to the lines array
    tabs = np.array(lines) #creates numpy array out of the lines
    tabsTranspose = tabs.T #takes transpose of array so we can easily iterate through the columns by making them the rows
    #trans[tabsTranspose=='-']= np.NaN
    mask_array = [any(char.isdigit() for char in row) for row in tabsTranspose] #creates mask that finds the lines that have notes (will be used to filter out empty lines)
    #print(mask_array)
    array = tabsTranspose[mask_array].T #filters out all lines without notes to get array with all notes
    #print()
    flag = False
    if array.shape[0]==6:
        flag = True
    return array,flag
    #print(array)
    #[''.join(row) for row in array]
    
#input HTML beautiful soup and get array of tabs
def getTab(soup):
    body = soup.find(class_='js-page js-global-wrapper') #finds main body of website which contains the strum patterns
    #print(body)
    patterns = body.find_all(class_='_2o1rM') #every tabs line is within a class called '_2o1rM'
    #print('Patterns:',patterns)
    allTabs = [] #will hold each row of tabs
    for pattern in patterns:
        parsedTab,check = parseTab(pattern)
        if check:
            allTabs.append(parsedTab)
    #allTabs= [parsedTab for (parsedTab,check) in parseTab(pattern) if check for pattern in patterns] #parses each row of tab and stores in allTabs
    #print('AllTabs:',allTabs)

    combinedTab = allTabs[0] 
    for i in range(1,len(allTabs)): #combine all the rows of tabs into one long tab
        combinedTab = np.append(combinedTab,allTabs[i],axis = 1)
    return combinedTab


def getAdditionalInfo(soup):
    body = soup.find(class_='js-page js-global-wrapper') #finds main body of website which contains the strum patterns
    info = soup.find(class_='_2I_M-')
    capo = 0
    key = ''
    tuning = ''
    for line in info:
        if 'Capo' in line.text:
            #https://stackoverflow.com/questions/4289331/how-to-extract-numbers-from-a-string-in-python
            capo = [int(val) for val in re.findall(r'\d+', 'Capo: 7th')][0]
        if 'Key' in line.text:
            key = line.text.split()[1]
        if 'Tuning' in line.text:
            tuning = line.text.split(': ')[1]
    return capo,key,tuning


#input a song's ultimate guitar url to get a numpy array of the tabs of that song
async def getSongData(url): #function to parse for strum patterns at given url
    start_time = time.time()
    asession = AsyncHTMLSession() #creates Async object
    
    r = await asession.get(url ,timeout=30) #requests website from server and waits for response

    #print("Rendering JavaScript")
    site = await r.html.arender(timeout=30) #renders the javascript, NOTE: Currently the speed bottleneck as the javascript is taking too long to render
    
    print("Render Website --- %s seconds ---" % (time.time() - start_time))
    start_time = time.time()
    soup = BeautifulSoup(r.html.html, features='lxml') #creates BeautifulSoup object which will parse the html
    #print(soup.prettify())
    try:
        capo,key,tuning = getAdditionalInfo(soup)
        print('Capo:',capo,"Key:",key,"Tuning:",tuning)
        tab = getTab(soup) #calls function that will parse tabs
    except:
        print('----------Scraping Failed----------\n',url,)
        await r.session.close() #closes chromium process
        await asession.close() #close session
    print("Process Data --- %s seconds ---" % (time.time() - start_time))
    r.close()
    await r.session.close() #closes chromium process
    await asession.close() #close session
    return capo,key,tuning,tab #returns array of tabs

async def scrapeSongs(numSongs):
    asession = AsyncHTMLSession() #creates Async object
    page = 1 #starts on page 1 of songs website
    URL = 'https://www.ultimate-guitar.com/explore?type[]=Tabs&page='
    count = 0 #will hold # of songs checked
    data = []
    while count < numSongs and page <= 20: #each page holds 50 songs so sets limits on number of pages checked
        try:
            r = await asession.get(URL+str(page),timeout=30) #requests website from server and waits for response

            site = await r.html.arender(timeout=30) #renders javascript on page

            soup = BeautifulSoup(r.html.html, features='lxml') #creates BeautifulSoup object which will parse the html
            songs = soup.find_all(class_='_36xEX _3_qAd _17l1x') #the class that holds the url for each song
            for song in songs: #iterates through all songs on page
                try: #try except statements in case javascript rendering times out
                    #print(song['href'])
                    capo,key,tuning,tab = await getSongData(song['href']) #call getData using url of this song to get song data
                    data.append((song.text,song['href'],key,tuning,capo,tab)) #add tuple containing song name, url, and strum patterns to list
    #             except Exception:
    #                 traceback.print_exc()
                except:
                    print('Scraping Failed for song #',count + 1) 

                count += 1 #increase count on # of songs scraped
                if count > numSongs: #if we've scraped enough songs, break and return
                    break
                print('Scraped Song #',count)
            #await r.session.close() #closes chromium process for page
            page += 1 #after all songs scraped from page, increment page count to access next page of songs
        except:
            print("\n\n---------------FAILED TO LOAD PAGE-------------------\n\n")
            page += 1
    await r.session.close()
    await asession.close() #closes AnsyncHTMLSession
    return data

In [None]:
url2 = 'https://tabs.ultimate-guitar.com/tab/the-beatles/here-comes-the-sun-tabs-201130'
url1 = 'https://tabs.ultimate-guitar.com/tab/led-zeppelin/stairway-to-heaven-tabs-9488'
url3 = 'https://tabs.ultimate-guitar.com/tab/nirvana/come-as-you-are-tabs-47319'
check = await getSongData(url2)
print(check)
#fullprint(check.T) #print the transpose because the normal array would be too wide to print cleanly

In [3]:
test = await scrapeSongs(1000)
dataframe = pd.DataFrame(test,columns=['Name','URL','Key','Tuning','Capo','Tabs'])
dataframe

Render Website --- 2.144268035888672 seconds ---
Capo: 0 Key: Am Tuning: E A D G B E
Process Data --- 0.05169415473937988 seconds ---
Scraped Song # 1
Render Website --- 2.1361570358276367 seconds ---
Capo: 0 Key: Em Tuning: E A D G B E
Process Data --- 0.03722405433654785 seconds ---
Scraped Song # 2
Render Website --- 2.173511028289795 seconds ---
Capo: 0 Key: Gm Tuning: E A D G B E
Process Data --- 0.0492861270904541 seconds ---
Scraped Song # 3
Render Website --- 2.1393442153930664 seconds ---
Capo: 0 Key: Bm Tuning: E A D G B E
Process Data --- 0.05918002128601074 seconds ---
Scraped Song # 4
Render Website --- 2.0136330127716064 seconds ---
Capo: 0 Key: E Tuning: D G C F A D
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/nirvana/come-as-you-are-tabs-47319
Process Data --- 0.050141096115112305 seconds ---
Scraping Failed for song # 5
Scraped Song # 5
Render Website --- 2.080904960632324 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
Process Data --- 0.04

Render Website --- 2.0668420791625977 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.029350996017456055 seconds ---
Scraped Song # 49
Render Website --- 2.0578999519348145 seconds ---
Capo: 0 Key: Em Tuning: E A D G B E
Process Data --- 0.03738903999328613 seconds ---
Scraped Song # 50
Render Website --- 2.177953004837036 seconds ---
Capo: 0 Key: Ebm Tuning: E A D G B E
Process Data --- 0.06107687950134277 seconds ---
Scraped Song # 51
Render Website --- 1.996589183807373 seconds ---
Capo: 0 Key: Bm Tuning: E A D G B E
Process Data --- 0.03728985786437988 seconds ---
Scraped Song # 52
Render Website --- 1.9627001285552979 seconds ---
Capo: 0 Key: C Tuning: 
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/the-beatles/yesterday-tabs-46978
Process Data --- 0.05180811882019043 seconds ---
Scraping Failed for song # 53
Scraped Song # 53
Render Website --- 1.9356892108917236 seconds ---
Capo: 0 Key: Dm Tuning: D A D G B E
Process Data --- 0.0312111

Render Website --- 2.229613780975342 seconds ---
Capo: 0 Key: E Tuning: 
Process Data --- 0.06337881088256836 seconds ---
Scraped Song # 99
Render Website --- 2.0011491775512695 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.03166007995605469 seconds ---
Scraped Song # 100
Render Website --- 2.0667080879211426 seconds ---
Capo: 0 Key: F#m Tuning: E A D G B E
Process Data --- 0.053092241287231445 seconds ---
Scraped Song # 101
Render Website --- 2.4431991577148438 seconds ---
Capo: 7 Key:  Tuning: E A D G B E
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/wilbur-soot/saline-solution-tabs-3237473
Process Data --- 0.057034969329833984 seconds ---
Scraping Failed for song # 102
Scraped Song # 102
Render Website --- 2.0317859649658203 seconds ---
Capo: 0 Key: A Tuning: E A D G B E
Process Data --- 0.036946773529052734 seconds ---
Scraped Song # 103
Render Website --- 2.0650858879089355 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.0363478

Render Website --- 2.0037639141082764 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.03145313262939453 seconds ---
Scraped Song # 147
Render Website --- 1.971264123916626 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.03251910209655762 seconds ---
Scraped Song # 148
Render Website --- 2.0817148685455322 seconds ---
Capo: 0 Key: Bm Tuning: 
Process Data --- 0.0734562873840332 seconds ---
Scraped Song # 149
Render Website --- 2.0119922161102295 seconds ---
Capo: 0 Key: Em Tuning: E A D G B E
Process Data --- 0.0401918888092041 seconds ---
Scraped Song # 150
Render Website --- 1.9801440238952637 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.042707204818725586 seconds ---
Scraped Song # 151
Render Website --- 2.04335880279541 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.039106130599975586 seconds ---
Scraped Song # 152
Render Website --- 2.029682159423828 seconds ---
Capo: 0 Key: E Tuning: E A D G B E
Process Data --- 0.04004597663879394

Render Website --- 1.9831628799438477 seconds ---
Capo: 7 Key:  Tuning: E A D G B E
Process Data --- 0.030599117279052734 seconds ---
Scraped Song # 191
Render Website --- 1.9852607250213623 seconds ---
Capo: 0 Key: Em Tuning: Eb Ab Db Gb Bb Eb
Process Data --- 0.03480076789855957 seconds ---
Scraped Song # 192
Render Website --- 2.097808837890625 seconds ---
Capo: 0 Key: Bb Tuning: E A D G B E
Process Data --- 0.03236818313598633 seconds ---
Scraped Song # 193
Render Website --- 2.047628879547119 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.030881166458129883 seconds ---
Scraped Song # 194
Render Website --- 2.1374762058258057 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.030328035354614258 seconds ---
Scraped Song # 195
Render Website --- 2.026723861694336 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.031213760375976562 seconds ---
Scraped Song # 196
Render Website --- 1.9793119430541992 seconds ---
Capo: 0 Key: Am Tuning: E A D G B E
Pr

Render Website --- 2.2632813453674316 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.04280805587768555 seconds ---
Scraped Song # 236
Render Website --- 2.0646002292633057 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.0355679988861084 seconds ---
Scraped Song # 237
Render Website --- 2.0029687881469727 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.031135082244873047 seconds ---
Scraped Song # 238
Render Website --- 2.3437230587005615 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.030036211013793945 seconds ---
Scraped Song # 239
Render Website --- 2.0465118885040283 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.043073177337646484 seconds ---
Scraped Song # 240
Render Website --- 2.0332188606262207 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.03127121925354004 seconds ---
Scraped Song # 241
Render Website --- 2.000492811203003 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.02926

Render Website --- 1.9907512664794922 seconds ---
Capo: 0 Key: Eb Tuning: E A D G B E
Process Data --- 0.02831101417541504 seconds ---
Scraped Song # 286
Render Website --- 1.958683967590332 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.029679059982299805 seconds ---
Scraped Song # 287
Render Website --- 4.049138069152832 seconds ---
Capo: 0 Key:  Tuning: D G C F A D
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/nirvana/come-as-you-are-tabs-1107643
Process Data --- 0.06369209289550781 seconds ---
Scraping Failed for song # 288
Scraped Song # 288
Render Website --- 2.0841450691223145 seconds ---
Capo: 0 Key:  Tuning: D A D G B E
Process Data --- 0.04014015197753906 seconds ---
Scraped Song # 289
Render Website --- 2.009437084197998 seconds ---
Capo: 7 Key:  Tuning: 
Process Data --- 0.035455942153930664 seconds ---
Scraped Song # 290
Render Website --- 2.027548313140869 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.036603

Render Website --- 2.132714033126831 seconds ---
Capo: 7 Key: C# Tuning: E A D G B E
Process Data --- 0.03821587562561035 seconds ---
Scraped Song # 331
Render Website --- 3.0696420669555664 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.0615239143371582 seconds ---
Scraped Song # 332
Render Website --- 1.960299015045166 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.02981877326965332 seconds ---
Scraped Song # 333
Render Website --- 1.936600685119629 seconds ---
Capo: 0 Key:  Tuning: 
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/gwen-stefani/buble-pop-electric-tabs-153686
Process Data --- 0.04719686508178711 seconds ---
Scraping Failed for song # 334
Scraped Song # 334
Render Website --- 2.054410934448242 seconds ---
Capo: 0 Key: B Tuning: E A D G B E
Process Data --- 0.0336298942565918 seconds ---
Scraped Song # 335
Render Website --- 2.022052049636841 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.030

Render Website --- 1.9692561626434326 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.03464818000793457 seconds ---
Scraped Song # 378
Render Website --- 1.935582160949707 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.03156614303588867 seconds ---
Scraped Song # 379
Render Website --- 2.1269891262054443 seconds ---
Capo: 0 Key: F Tuning: D G C F A D
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/the-beatles/yesterday-tabs-98673
Process Data --- 0.06135272979736328 seconds ---
Scraping Failed for song # 380
Scraped Song # 380
Render Website --- 2.021448850631714 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.02931976318359375 seconds ---
Scraped Song # 381
Render Website --- 2.261584997177124 seconds ---
Capo: 0 Key: E Tuning: E A D G B E
Process Data --- 0.02736830711364746 seconds ---
Scraped Song # 382
Render Website --- 2.1181092262268066 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.

Render Website --- 2.0915982723236084 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.04024100303649902 seconds ---
Scraped Song # 425
Render Website --- 2.103447914123535 seconds ---
Capo: 0 Key:  Tuning: Eb Ab Db Gb Bb Eb
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/misc-soundtrack/the-hobbit-misty-mountains-tabs-1204884
Process Data --- 0.050811052322387695 seconds ---
Scraping Failed for song # 426
Scraped Song # 426
Render Website --- 1.9096729755401611 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.030297040939331055 seconds ---
Scraped Song # 427
Render Website --- 2.0507829189300537 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.03712797164916992 seconds ---
Scraped Song # 428
Render Website --- 2.055558919906616 seconds ---
Capo: 0 Key: G Tuning: 
Process Data --- 0.03673911094665527 seconds ---
Scraped Song # 429
Render Website --- 2.0825982093811035 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.04

Render Website --- 2.0357348918914795 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.04056429862976074 seconds ---
Scraped Song # 473
Render Website --- 2.0106558799743652 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.04242897033691406 seconds ---
Scraped Song # 474
Render Website --- 2.077453851699829 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.0393528938293457 seconds ---
Scraped Song # 475
Render Website --- 2.0502841472625732 seconds ---
Capo: 0 Key:  Tuning: Eb Ab Db Gb Bb Eb
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/jimi-hendrix/hey-joe-tabs-11711
Process Data --- 0.05362677574157715 seconds ---
Scraping Failed for song # 476
Scraped Song # 476
Render Website --- 2.019534111022949 seconds ---
Capo: 7 Key:  Tuning: 
Process Data --- 0.0369417667388916 seconds ---
Scraped Song # 477
Render Website --- 2.190593957901001 seconds ---
Capo: 0 Key:  Tuning: A E A D F# B
----------Scraping Failed----------
 https://tabs.ultimate-gu

Render Website --- 2.048434257507324 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.03455090522766113 seconds ---
Scraped Song # 523
Render Website --- 2.0520410537719727 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.0305330753326416 seconds ---
Scraped Song # 524
Render Website --- 2.15409779548645 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.055455923080444336 seconds ---
Scraped Song # 525
Render Website --- 2.28006911277771 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.052780866622924805 seconds ---
Scraped Song # 526
Render Website --- 2.084306240081787 seconds ---
Capo: 7 Key: F Tuning: 
Process Data --- 0.0417332649230957 seconds ---
Scraped Song # 527
Render Website --- 2.014382839202881 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.033528804779052734 seconds ---
Scraped Song # 528
Render Website --- 2.056335210800171 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.041088104248046875 seconds ---
Scraped Song # 529
Render 

Render Website --- 2.0914082527160645 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.045272111892700195 seconds ---
Scraped Song # 570
Render Website --- 2.106199026107788 seconds ---
Capo: 0 Key:  Tuning: 
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/disturbed/stricken-tabs-212004
Process Data --- 0.06474590301513672 seconds ---
Scraping Failed for song # 571
Scraped Song # 571
Render Website --- 2.079545736312866 seconds ---
Capo: 0 Key:  Tuning: 
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/fleetwood-mac/dreams-tabs-1461051
Process Data --- 0.05291581153869629 seconds ---
Scraping Failed for song # 572
Scraped Song # 572
Render Website --- 2.0167031288146973 seconds ---
Capo: 7 Key:  Tuning: E A D G B E
Process Data --- 0.03222084045410156 seconds ---
Scraped Song # 573
Render Website --- 2.275322914123535 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.09392833709716797 seconds ---
Scraped Song # 574
Render Webs

Render Website --- 1.9175400733947754 seconds ---
Capo: 7 Key: Eb Tuning: E A D G B E
Process Data --- 0.028625965118408203 seconds ---
Scraped Song # 617
Render Website --- 2.1047329902648926 seconds ---
Capo: 7 Key:  Tuning: E A D G B E
Process Data --- 0.038863182067871094 seconds ---
Scraped Song # 618
Render Website --- 1.990670919418335 seconds ---
Capo: 0 Key: Gm Tuning: E A D G B E
Process Data --- 0.031951189041137695 seconds ---
Scraped Song # 619
Render Website --- 1.987023115158081 seconds ---
Capo: 0 Key: C Tuning: D A D G B E
Process Data --- 0.034857988357543945 seconds ---
Scraped Song # 620
Render Website --- 5.421737909317017 seconds ---
Capo: 0 Key: A Tuning: 
Process Data --- 0.046423912048339844 seconds ---
Scraped Song # 621
Render Website --- 2.0140788555145264 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.036440134048461914 seconds ---
Scraped Song # 622
Render Website --- 2.09263014793396 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process 

Render Website --- 2.079401969909668 seconds ---
Capo: 0 Key:  Tuning: D G C F A D
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/55686
Process Data --- 0.06004619598388672 seconds ---
Scraping Failed for song # 667
Scraped Song # 667
Render Website --- 2.081458330154419 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.041059017181396484 seconds ---
Scraped Song # 668
Render Website --- 2.1624979972839355 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.04781508445739746 seconds ---
Scraped Song # 669
Render Website --- 2.1101040840148926 seconds ---
Capo: 0 Key:  Tuning: D A D F# A D
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/mitski/last-words-of-a-shooting-star-tabs-1908397
Process Data --- 0.052713871002197266 seconds ---
Scraping Failed for song # 670
Scraped Song # 670
Render Website --- 2.247864007949829 seconds ---
Capo: 7 Key:  Tuning: 
Process Data --- 0.04951333999633789 seconds ---
Scraped Song # 

Render Website --- 2.228219985961914 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.040831804275512695 seconds ---
Scraped Song # 716
Render Website --- 2.0519728660583496 seconds ---
Capo: 7 Key:  Tuning: 
Process Data --- 0.035176992416381836 seconds ---
Scraped Song # 717
Render Website --- 2.0218653678894043 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.036170005798339844 seconds ---
Scraped Song # 718
Render Website --- 2.0321872234344482 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.03983592987060547 seconds ---
Scraped Song # 719
Render Website --- 2.246955156326294 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.04504513740539551 seconds ---
Scraped Song # 720
Render Website --- 2.181670904159546 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.048538923263549805 seconds ---
Scraped Song # 721
Render Website --- 2.037587881088257 seconds ---
Capo: 0 Key:  Tuning: 
----------Scraping Failed----------
 https://tab

Render Website --- 2.2087340354919434 seconds ---
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/polyphia/goat-tabs-2521839
Process Data --- 0.03568911552429199 seconds ---
Scraping Failed for song # 764
Scraped Song # 764
Render Website --- 2.8002140522003174 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.030213117599487305 seconds ---
Scraped Song # 765
Render Website --- 7.378797769546509 seconds ---
Capo: 0 Key: E Tuning: E A D G B E
Process Data --- 0.05645608901977539 seconds ---
Scraped Song # 766
Render Website --- 2.412440061569214 seconds ---
Capo: 0 Key: C Tuning: E A D G B E
Process Data --- 0.04971909523010254 seconds ---
Scraped Song # 767
Render Website --- 2.3617141246795654 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.03583788871765137 seconds ---
Scraped Song # 768
Render Website --- 2.2642266750335693 seconds ---
Capo: 0 Key: Ab Tuning: E A D G B E
Process Data --- 0.03380298614501953 seconds ---
Scraped Song # 76

Render Website --- 2.0017919540405273 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.03052210807800293 seconds ---
Scraped Song # 808
Render Website --- 2.0812828540802 seconds ---
Capo: 0 Key:  Tuning: 
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/pink-floyd/breathe-tabs-790297
Process Data --- 0.05694317817687988 seconds ---
Scraping Failed for song # 809
Scraped Song # 809
Render Website --- 2.0552070140838623 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.04377913475036621 seconds ---
Scraped Song # 810
Render Website --- 2.1147587299346924 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.047944068908691406 seconds ---
Scraped Song # 811
Render Website --- 2.0784308910369873 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.040563106536865234 seconds ---
Scraped Song # 812
Render Website --- 2.108711004257202 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.03780102729797363 seconds

Render Website --- 2.03926682472229 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.0411529541015625 seconds ---
Scraped Song # 856
Render Website --- 2.046344041824341 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.03489208221435547 seconds ---
Scraped Song # 857
Render Website --- 2.149117946624756 seconds ---
Capo: 0 Key:  Tuning: 
Process Data --- 0.036344051361083984 seconds ---
Scraped Song # 858
Render Website --- 2.144440174102783 seconds ---
Capo: 0 Key:  Tuning: 
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/bon-iver/skinny-love-tabs-888350
Process Data --- 0.06290268898010254 seconds ---
Scraping Failed for song # 859
Scraped Song # 859
Render Website --- 2.0503222942352295 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.031616926193237305 seconds ---
Scraped Song # 860
Render Website --- 2.09936785697937 seconds ---
Capo: 7 Key:  Tuning: E A D G B E
Process Data --- 0.04367375373840332 seconds -

Render Website --- 2.107778787612915 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.04639005661010742 seconds ---
Scraped Song # 904
Render Website --- 2.2361350059509277 seconds ---
Capo: 7 Key:  Tuning: 
Process Data --- 0.04410219192504883 seconds ---
Scraped Song # 905
Render Website --- 2.1636338233947754 seconds ---
Capo: 7 Key:  Tuning: 
Process Data --- 0.05293011665344238 seconds ---
Scraped Song # 906
Render Website --- 4.518763065338135 seconds ---
Capo: 7 Key: Em Tuning: E A D G B E
Process Data --- 0.06864476203918457 seconds ---
Scraped Song # 907
Render Website --- 3.375066041946411 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.03954315185546875 seconds ---
Scraped Song # 908
Render Website --- 2.0261740684509277 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.03158903121948242 seconds ---
Scraped Song # 909
Render Website --- 2.010956048965454 seconds ---
Capo: 0 Key: C Tuning: E A D G B E
Process Data --- 0.03350305

Render Website --- 2.148068904876709 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.03755998611450195 seconds ---
Scraped Song # 953
Render Website --- 2.065824031829834 seconds ---
Capo: 0 Key: C Tuning: E A D G B E
Process Data --- 0.0416712760925293 seconds ---
Scraped Song # 954
Render Website --- 2.0411269664764404 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.031575918197631836 seconds ---
Scraped Song # 955
Render Website --- 2.0501468181610107 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.03038311004638672 seconds ---
Scraped Song # 956
Render Website --- 4.931445121765137 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process Data --- 0.04804205894470215 seconds ---
Scraped Song # 957
Render Website --- 2.0803308486938477 seconds ---
Capo: 7 Key:  Tuning: E A D G B E
Process Data --- 0.04014396667480469 seconds ---
Scraped Song # 958
Render Website --- 2.095810890197754 seconds ---
Capo: 0 Key:  Tuning: E A D G B E
Process

Render Website --- 2.2712209224700928 seconds ---
Capo: 0 Key: Em Tuning: 
Process Data --- 0.037149906158447266 seconds ---
Scraped Song # 1000


Unnamed: 0,Name,URL,Key,Tuning,Capo,Tabs
0,Stairway To Heaven,https://tabs.ultimate-guitar.com/tab/led-zeppe...,Am,E A D G B E,0,"[[-, -, -, 5, 7, -, -, 7, 8, -, -, 8, 2, -, -,..."
1,Nothing Else Matters,https://tabs.ultimate-guitar.com/tab/metallica...,Em,E A D G B E,0,"[[-, -, -, 0, -, -, -, -, -, 0, -, -, -, -, -,..."
2,Do I Wanna Know,https://tabs.ultimate-guitar.com/tab/arctic-mo...,Gm,E A D G B E,0,"[[-, -, -, -, -, -, -, -, -, -, -, -, -, -, -,..."
3,Hotel California (ver 2),https://tabs.ultimate-guitar.com/tab/eagles/ho...,Bm,E A D G B E,0,"[[-, -, -, -, -, -, 7, 1, 0, 1, 2, -, -, 9, -,..."
4,Here Comes The Sun (ver 3),https://tabs.ultimate-guitar.com/tab/the-beatl...,D,E A D G B E,7,"[[2, -, 0, 2, -, 2, 0, -, -, -, 0, -, -, -, -,..."
...,...,...,...,...,...,...
809,Beast Of Burden,https://tabs.ultimate-guitar.com/tab/the-rolli...,,,0,"[[-, -, -, -, -, -, -, -, -, -, -, -, -, -, -,..."
810,Hammer To Fall,https://tabs.ultimate-guitar.com/tab/queen/ham...,,,0,"[[-, -, -, -, -, -, -, -, -, -, -, -, -, -, -,..."
811,Run Through The Jungle (ver 2),https://tabs.ultimate-guitar.com/tab/creedence...,,D A D G B E,0,"[[-, -, -, -, -, -, -, -, -, -, -, -, -, -, -,..."
812,The Number Of The Beast,https://tabs.ultimate-guitar.com/tab/iron-maid...,,,0,"[[-, -, -, -, -, -, -, -, -, -, -, -, -, -, -,..."


In [4]:
pickle.dump(dataframe, open( "ScrapedSongs1000.p", "wb" ) )

In [None]:
sum = 0
for data in dataframe.iloc[:,5]:
    sum+=data.shape[1]
sum

In [None]:
# #TESTING

# asession = AsyncHTMLSession() #creates Async object
# url2 = 'https://tabs.ultimate-guitar.com/tab/the-beatles/here-comes-the-sun-tabs-201130'
# url1 = 'https://tabs.ultimate-guitar.com/tab/led-zeppelin/stairway-to-heaven-tabs-9488'
# r = await asession.get(url1 ,timeout=30) #requests website from server and waits for response

# #print("Rendering JavaScript")
# site = await r.html.arender(timeout=30) #renders the javascript, NOTE: Currently the speed bottleneck as the javascript is taking too long to render

# soup = BeautifulSoup(r.html.html, features='lxml') #creates BeautifulSoup object which will parse the html
# body = soup.find(class_='js-page js-global-wrapper') #finds main body of website which contains the strum patterns
# patterns = body.find_all(class_='_2J-ci') #every tabs line is within a class called '_2J-ci'

# print('Patterns length:',len(patterns))
# #print(patterns[0].text)
# allTabs = []
# allTabs= [parseTab(pattern) for pattern in patterns]
# print('Alltabs length:',len(allTabs))
# x = allTabs[0]
# for i in range(1,len(allTabs)):
#     #print('1st:',allTabs[i-1])
#     print(allTabs[i].shape)
#     print(i)
#     print('2nd:',allTabs[i],'\n')
#     x = np.append(x,allTabs[i],axis = 1)
# print(x.shape)
# fullprint(x[:,0:10])
# fullprint(x.T)
# #print(x[:,0:10])
# #np.append(array,array2,axis=1).shape



In [None]:
# #TESTING

# # for line in patterns[8].children:
# #     print(line.text)
    
# x = patterns[17].find_all(class_='_1zlI0')
# for line in x:
#     if line.text[0] == 'e' or line.text[0] == 'B' or line.text[0] == 'G' or line.text[0] == 'D' or line.text[0] == 'A' or line.text[0] == 'E':
#         print(line.text)

# for line in x:
#     print(line.text)
# x[2]

In [None]:
# #TESTING

# lines = []
# for line in patterns[0].children:
#     lines.append(list(line.text))
# tabs = np.array(lines)
# tabsTranspose = z.T
# #trans[tabsTranspose=='-']= np.NaN
# mask_array = [any(char.isdigit() for char in row) for row in tabsTranspose]
# #print(mask_array)
# array = tabsTranspose[mask_array].T
# #print(array)
# #[''.join(row) for row in array]

# lines = []
# for line in patterns[1].children:
#     lines.append(list(line.text))
# tabs = np.array(lines)
# tabsTranspose = z.T
# #trans[tabsTranspose=='-']= np.NaN
# mask_array = [any(char.isdigit() for char in row) for row in tabsTranspose]
# #print(mask_array)
# array2 = tabsTranspose[mask_array].T
# #print(array)
# [''.join(row) for row in array]
# np.append(array,array2,axis=1).shape


In [None]:
#just learning about list comprehension and in line loops
# array = [[1,2,3,4,5],[6,7,8,9,10]]
# print([[num > 4 for num in row] for row in array])
# print([any(num >4 for num in row) for row in array])
string = 'testing string split function'
string.split()
import re
capo = [int(val) for val in re.findall(r'\d+', 'Capo: 7th')][0]
capo