In [1]:
import requests
from requests_html import HTMLSession
from requests_html import AsyncHTMLSession
from bs4 import BeautifulSoup
import pprint
import numpy as np
import pandas as pd
import time
import pickle
import re

In [2]:
#source: https://stackoverflow.com/questions/1987694/how-to-print-the-full-numpy-array-without-truncation
#function to print out full numpy array instead of excerpt
def fullprint(*args, **kwargs):
    from pprint import pprint
    import numpy
    opt = numpy.get_printoptions()
    numpy.set_printoptions(threshold=numpy.inf)
    pprint(*args, **kwargs)
    numpy.set_printoptions(**opt)
    

#parse HTML text to clean up tabs and return array of tabs (get rid of all the spaces)
def parseTab(TAB):
    lines = [] #will hold all lines of the tab
    for line in TAB.children: #iterate through lines of tab
        #only parse lines with notes (ignores lyric lines and chord lines)
        if 'e|' == line.text[0:2] or 'B|' == line.text[0:2] or 'G|' == line.text[0:2] or 'D|' == line.text[0:2] or 'A|' == line.text[0:2] or 'E|' == line.text[0:2]:
            #print(line.text)
            val = list(line.text)
            if(lines):
                val = val[0:len(lines[0])] #some lines have additional text at the end so removes that
            lines.append(val) #adds to the lines array
    tabs = np.array(lines) #creates numpy array out of the lines
    tabsTranspose = tabs.T #takes transpose of array so we can easily iterate through the columns by making them the rows
    #trans[tabsTranspose=='-']= np.NaN
    mask_array = [any(char.isdigit() for char in row) for row in tabsTranspose] #creates mask that finds the lines that have notes (will be used to filter out empty lines)
    #print(mask_array)
    array = tabsTranspose[mask_array].T #filters out all lines without notes to get array with all notes
    #print()
    flag = False
    if array.shape[0]==6:
        flag = True
    return array,flag
    #print(array)
    #[''.join(row) for row in array]
    
#input HTML beautiful soup and get array of tabs
def getTab(soup):
    body = soup.find(class_='js-page js-global-wrapper') #finds main body of website which contains the strum patterns
    #print(body)
    patterns = body.find_all(class_='_2o1rM') #every tabs line is within a class called '_2o1rM'
    #print('Patterns:',patterns)
    allTabs = [] #will hold each row of tabs
    for pattern in patterns:
        parsedTab,check = parseTab(pattern)
        if check:
            allTabs.append(parsedTab)
    #allTabs= [parsedTab for (parsedTab,check) in parseTab(pattern) if check for pattern in patterns] #parses each row of tab and stores in allTabs
    #print('AllTabs:',allTabs)

    combinedTab = allTabs[0] 
    for i in range(1,len(allTabs)): #combine all the rows of tabs into one long tab
        combinedTab = np.append(combinedTab,allTabs[i],axis = 1)
    return combinedTab


def getAdditionalInfo(soup):
    body = soup.find(class_='js-page js-global-wrapper') #finds main body of website which contains the strum patterns
    info = soup.find(class_='_2I_M-')
    capo = 0
    key = ''
    tuning = ''
    for line in info:
        if 'Capo' in line.text:
            #https://stackoverflow.com/questions/4289331/how-to-extract-numbers-from-a-string-in-python
            capo = [int(val) for val in re.findall(r'\d+', 'Capo: 7th')][0]
        if 'Key' in line.text:
            key = line.text.split()[1]
        if 'Tuning' in line.text:
            tuning = line.text.split(': ')[1]
    return capo,key,tuning


#input a song's ultimate guitar url to get a numpy array of the tabs of that song
async def getSongData(url): #function to parse for strum patterns at given url
    start_time = time.time()
    asession = AsyncHTMLSession() #creates Async object
    
    r = await asession.get(url ,timeout=30) #requests website from server and waits for response

    #print("Rendering JavaScript")
    site = await r.html.arender(timeout=30) #renders the javascript, NOTE: Currently the speed bottleneck as the javascript is taking too long to render
    
    print("Render Website --- %s seconds ---" % (time.time() - start_time))
    start_time = time.time()
    soup = BeautifulSoup(r.html.html, features='lxml') #creates BeautifulSoup object which will parse the html
    #print(soup.prettify())
    try:
        capo,key,tuning = getAdditionalInfo(soup)
        print('Capo:',capo,"Key:",key,"Tuning:",tuning)
        tab = getTab(soup) #calls function that will parse tabs
    except:
        print('----------Scraping Failed----------\n',url,)
        await r.session.close() #closes chromium process
        await asession.close() #close session
    print("Process Data --- %s seconds ---" % (time.time() - start_time))
    r.close()
    await r.session.close() #closes chromium process
    await asession.close() #close session
    return capo,key,tuning,tab #returns array of tabs

async def scrapeSongs(numSongs):
    asession = AsyncHTMLSession() #creates Async object
    page = 1 #starts on page 1 of songs website
    URL = 'https://www.ultimate-guitar.com/explore?type[]=Tabs&page='
    URL_KEY_C = 'https://www.ultimate-guitar.com/explore?type[]=Tabs&tonality[]=15&page='
    URL_KEY_G = 'https://www.ultimate-guitar.com/explore?type[]=Tabs&tonality[]=17&page='
    URL_KEY_D = 'https://www.ultimate-guitar.com/explore?type[]=Tabs&tonality[]=19&page='
    count = 0 #will hold # of songs checked
    data = []
    while count < numSongs and page <= 20: #each page holds 50 songs so sets limits on number of pages checked
        try:
            r = await asession.get(URL_KEY_D+str(page),timeout=30) #requests website from server and waits for response

            site = await r.html.arender(timeout=30) #renders javascript on page

            soup = BeautifulSoup(r.html.html, features='lxml') #creates BeautifulSoup object which will parse the html
            songs = soup.find_all(class_='_36xEX _3_qAd _17l1x') #the class that holds the url for each song
            for song in songs: #iterates through all songs on page
                try: #try except statements in case javascript rendering times out
                    #print(song['href'])
                    capo,key,tuning,tab = await getSongData(song['href']) #call getData using url of this song to get song data
                    data.append((song.text,song['href'],key,tuning,capo,tab)) #add tuple containing song name, url, and strum patterns to list
    #             except Exception:
    #                 traceback.print_exc()
                except:
                    print('Scraping Failed for song #',count + 1) 

                count += 1 #increase count on # of songs scraped
                if count > numSongs: #if we've scraped enough songs, break and return
                    break
                print('Scraped Song #',count)
            #await r.session.close() #closes chromium process for page
            page += 1 #after all songs scraped from page, increment page count to access next page of songs
        except:
            print("\n\n---------------FAILED TO LOAD PAGE-------------------\n\n")
            page += 1
    await r.session.close()
    await asession.close() #closes AnsyncHTMLSession
    return data

In [None]:
url2 = 'https://tabs.ultimate-guitar.com/tab/the-beatles/here-comes-the-sun-tabs-201130'
url1 = 'https://tabs.ultimate-guitar.com/tab/led-zeppelin/stairway-to-heaven-tabs-9488'
url3 = 'https://tabs.ultimate-guitar.com/tab/nirvana/come-as-you-are-tabs-47319'
check = await getSongData(url2)
print(check)
#fullprint(check.T) #print the transpose because the normal array would be too wide to print cleanly

In [3]:
test = await scrapeSongs(1000)
dataframe = pd.DataFrame(test,columns=['Name','URL','Key','Tuning','Capo','Tabs'])
dataframe

Render Website --- 2.1902389526367188 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
Process Data --- 0.04764103889465332 seconds ---
Scraped Song # 1
Render Website --- 2.3158159255981445 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.05858206748962402 seconds ---
Scraped Song # 2
Render Website --- 2.2573091983795166 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.05794191360473633 seconds ---
Scraped Song # 3
Render Website --- 2.353271961212158 seconds ---
Capo: 0 Key: D Tuning: Eb Ab Db Gb Bb Eb
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/guns-n-roses/sweet-child-o-mine-tabs-12657
Process Data --- 0.06409001350402832 seconds ---
Scraping Failed for song # 4
Scraped Song # 4
Render Website --- 4.6393327713012695 seconds ---
Capo: 0 Key: D Tuning: Eb Ab Db Gb Bb Eb
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/guns-n-roses/sweet-child-o-mine-tabs-57983
Process Data --- 0.071868896484375 se

Render Website --- 2.2634270191192627 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.04125690460205078 seconds ---
Scraped Song # 52
Render Website --- 2.5171358585357666 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
Process Data --- 0.056593894958496094 seconds ---
Scraped Song # 53
Render Website --- 3.6172420978546143 seconds ---
Capo: 0 Key: D Tuning: D A D G B E
Process Data --- 0.06820392608642578 seconds ---
Scraped Song # 54
Render Website --- 2.218484878540039 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.053965091705322266 seconds ---
Scraped Song # 55
Render Website --- 2.355269193649292 seconds ---
Capo: 0 Key: D Tuning: D A D G B E
Process Data --- 0.04852104187011719 seconds ---
Scraped Song # 56
Render Website --- 2.1162288188934326 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.028873205184936523 seconds ---
Scraped Song # 57
Render Website --- 2.3073909282684326 seconds ---
Capo: 0 Key: D Tuning: C F Bb Eb G C


Render Website --- 2.3395581245422363 seconds ---
Capo: 7 Key: D Tuning: D A D F# A D
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/sleepy-dog/math-rock-midwest-emo-riffage-tabs-3043164
Process Data --- 0.0556790828704834 seconds ---
Scraping Failed for song # 101
Scraped Song # 101
Render Website --- 2.029668092727661 seconds ---
Capo: 0 Key: D Tuning: D A D G B E
Process Data --- 0.04215407371520996 seconds ---
Scraped Song # 102
Render Website --- 2.1069769859313965 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.050618886947631836 seconds ---
Scraped Song # 103
Render Website --- 1.9478709697723389 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.02899622917175293 seconds ---
Scraped Song # 104
Render Website --- 2.108030080795288 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.0390169620513916 seconds ---
Scraped Song # 105
Render Website --- 2.026940107345581 seconds ---
Capo: 0 Key: D Tuning: E A D 

Render Website --- 2.3644371032714844 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.04302215576171875 seconds ---
Scraped Song # 149
Render Website --- 2.1652040481567383 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.032988786697387695 seconds ---
Scraped Song # 150
Render Website --- 2.115675687789917 seconds ---
Capo: 0 Key: D Tuning: 
Process Data --- 0.029387950897216797 seconds ---
Scraped Song # 151
Render Website --- 2.214900016784668 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.030917882919311523 seconds ---
Scraped Song # 152
Render Website --- 2.2596118450164795 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
Process Data --- 0.027801990509033203 seconds ---
Scraped Song # 153
Render Website --- 2.045945167541504 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.03428983688354492 seconds ---
Scraped Song # 154
Render Website --- 2.885756015777588 seconds ---
Capo: 0 Key: D Tuning: D A D F# A D
-------

Render Website --- 2.150458812713623 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.03150296211242676 seconds ---
Scraped Song # 196
Render Website --- 2.1231749057769775 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.04210519790649414 seconds ---
Scraped Song # 197
Render Website --- 2.3806979656219482 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.041259050369262695 seconds ---
Scraped Song # 198
Render Website --- 2.2434020042419434 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.03920412063598633 seconds ---
Scraped Song # 199
Render Website --- 2.6450343132019043 seconds ---
Capo: 0 Key: D Tuning: D G C F A D
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/all-them-witches/diamond-tabs-2565243
Process Data --- 0.06671404838562012 seconds ---
Scraping Failed for song # 200
Scraped Song # 200
Render Website --- 1.9504988193511963 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process

Render Website --- 2.1327428817749023 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.05704808235168457 seconds ---
Scraped Song # 246
Render Website --- 4.313227891921997 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.049405813217163086 seconds ---
Scraped Song # 247
Render Website --- 2.1726887226104736 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.034577131271362305 seconds ---
Scraped Song # 248
Render Website --- 2.0912301540374756 seconds ---
Capo: 0 Key: D Tuning: D A D G B E
Process Data --- 0.03189587593078613 seconds ---
Scraped Song # 249
Render Website --- 2.2264978885650635 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.029841899871826172 seconds ---
Scraped Song # 250
Render Website --- 2.066765785217285 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
Process Data --- 0.03080010414123535 seconds ---
Scraped Song # 251
Render Website --- 2.165363073348999 seconds ---
Capo: 0 Key: D Tuning: E A D G B

Render Website --- 2.6090500354766846 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.05265998840332031 seconds ---
Scraped Song # 293
Render Website --- 2.1945972442626953 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
Process Data --- 0.0274350643157959 seconds ---
Scraped Song # 294
Render Website --- 2.619407892227173 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/mike-oldfield/on-horseback-tabs-606540
Process Data --- 0.10607481002807617 seconds ---
Scraping Failed for song # 295
Scraped Song # 295
Render Website --- 2.745479106903076 seconds ---
Capo: 0 Key: D Tuning: A E A D F# B
Process Data --- 0.04629683494567871 seconds ---
Scraped Song # 296
Render Website --- 2.2396812438964844 seconds ---
Capo: 0 Key: D Tuning: D A D F# A D
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/alt-j/deadcrush-tabs-2159877
Process Data --- 0.08931708335876465 seconds ---
Scraping

Render Website --- 2.1545252799987793 seconds ---
Capo: 0 Key: D Tuning: D A D G B E
Process Data --- 0.03307700157165527 seconds ---
Scraped Song # 339
Render Website --- 2.031953811645508 seconds ---
Capo: 7 Key: D Tuning: D A D G B E
Process Data --- 0.0399470329284668 seconds ---
Scraped Song # 340
Render Website --- 2.2280490398406982 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/all-them-witches/call-me-star-tabs-2565315
Process Data --- 0.06535220146179199 seconds ---
Scraping Failed for song # 341
Scraped Song # 341
Render Website --- 2.241947889328003 seconds ---
Capo: 0 Key: D Tuning: Db Ab Db Gb Bb Eb
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/microwave/grass-stains-tabs-2562846
Process Data --- 0.06401515007019043 seconds ---
Scraping Failed for song # 342
Scraped Song # 342
Render Website --- 6.1114182472229 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data -

Render Website --- 2.2143962383270264 seconds ---
Capo: 0 Key: D Tuning: D A D G B E
Process Data --- 0.03889298439025879 seconds ---
Scraped Song # 383
Render Website --- 2.0246448516845703 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.03462624549865723 seconds ---
Scraped Song # 384
Render Website --- 2.3409247398376465 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.03444623947143555 seconds ---
Scraped Song # 385
Render Website --- 2.158466100692749 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
Process Data --- 0.03492999076843262 seconds ---
Scraped Song # 386
Render Website --- 2.152630090713501 seconds ---
Capo: 0 Key: D Tuning: D G C F A D
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/the-bad-light/neuronaut-tabs-2692893
Process Data --- 0.06035470962524414 seconds ---
Scraping Failed for song # 387
Scraped Song # 387
Render Website --- 4.130193710327148 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Dat

Render Website --- 3.54325008392334 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.044818878173828125 seconds ---
Scraped Song # 430
Render Website --- 2.123033046722412 seconds ---
Capo: 0 Key: D Tuning: D A D G B E
Process Data --- 0.038244009017944336 seconds ---
Scraped Song # 431
Render Website --- 2.103106737136841 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.03729104995727539 seconds ---
Scraped Song # 432
Render Website --- 2.162635087966919 seconds ---
Capo: 0 Key: D Tuning: D A D F# A D
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/marietta/chase-i-hardly-know-ya-tabs-3297230
Process Data --- 0.05893087387084961 seconds ---
Scraping Failed for song # 433
Scraped Song # 433
Render Website --- 2.29728102684021 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.08108091354370117 seconds ---
Scraped Song # 434
Render Website --- 2.336513042449951 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Proce

Render Website --- 1.9762699604034424 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.04146599769592285 seconds ---
Scraped Song # 479
Render Website --- 2.334533214569092 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.03093409538269043 seconds ---
Scraped Song # 480
Render Website --- 2.1115589141845703 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/switchfoot/your-love-is-a-song-tabs-882805
Process Data --- 0.05300402641296387 seconds ---
Scraping Failed for song # 481
Scraped Song # 481
Render Website --- 2.1811978816986084 seconds ---
Capo: 0 Key: D Tuning: D A D G B E
Process Data --- 0.04559731483459473 seconds ---
Scraped Song # 482
Render Website --- 2.6089839935302734 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.04527592658996582 seconds ---
Scraped Song # 483
Render Website --- 2.314394235610962 seconds ---
Capo: 0 Key: D Tuning: D A D G B E
Proc

Render Website --- 2.0560760498046875 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.040589094161987305 seconds ---
Scraped Song # 526
Render Website --- 2.2475898265838623 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
Process Data --- 0.031126976013183594 seconds ---
Scraped Song # 527
Render Website --- 2.07886004447937 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.03270721435546875 seconds ---
Scraped Song # 528
Render Website --- 2.3604848384857178 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.043859243392944336 seconds ---
Scraped Song # 529
Render Website --- 2.0114970207214355 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
Process Data --- 0.030450820922851562 seconds ---
Scraped Song # 530
Render Website --- 2.2386319637298584 seconds ---
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/lucy-dacus/i-dont-wanna-be-funny-anymore-tabs-2827331
Process Data --- 0.046386003494262695 seconds ---
Scrap

Render Website --- 2.3873069286346436 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.040904998779296875 seconds ---
Scraped Song # 576
Render Website --- 3.082486867904663 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/frank-turner/plain-sailing-weather-tabs-3154064
Process Data --- 0.06999492645263672 seconds ---
Scraping Failed for song # 577
Scraped Song # 577
Render Website --- 3.0531599521636963 seconds ---
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/colin-hay/overkill-tabs-3199769
Process Data --- 0.08374691009521484 seconds ---
Scraping Failed for song # 578
Scraped Song # 578
Render Website --- 2.5004658699035645 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.04358100891113281 seconds ---
Scraped Song # 579
Render Website --- 2.4391422271728516 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
Process Data --- 0.02947092056274414 seconds ---

Render Website --- 2.359246015548706 seconds ---
Capo: 0 Key: D Tuning: D A D G B E
Process Data --- 0.044390201568603516 seconds ---
Scraped Song # 622
Render Website --- 2.740001916885376 seconds ---
Capo: 0 Key: D Tuning: Eb Ab Db Gb Bb Eb
Process Data --- 0.053070068359375 seconds ---
Scraped Song # 623
Render Website --- 2.1823511123657227 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.03975081443786621 seconds ---
Scraped Song # 624
Render Website --- 2.290843963623047 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.0306851863861084 seconds ---
Scraped Song # 625
Render Website --- 2.2455179691314697 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.038535118103027344 seconds ---
Scraped Song # 626
Render Website --- 2.1189868450164795 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.029850006103515625 seconds ---
Scraped Song # 627
Render Website --- 2.19512677192688 seconds ---
Capo: 0 Key: D Tuning: E A D G 

Render Website --- 2.276012659072876 seconds ---
Capo: 0 Key: D Tuning: D A D G A D
Process Data --- 0.04395580291748047 seconds ---
Scraped Song # 669
Render Website --- 2.422929048538208 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
Process Data --- 0.037281036376953125 seconds ---
Scraped Song # 670
Render Website --- 2.12919282913208 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
Process Data --- 0.03716087341308594 seconds ---
Scraped Song # 671
Render Website --- 2.2073581218719482 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
Process Data --- 0.04619598388671875 seconds ---
Scraped Song # 672
Render Website --- 2.2144289016723633 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.032968997955322266 seconds ---
Scraped Song # 673
Render Website --- 2.2348320484161377 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.04352688789367676 seconds ---
Scraped Song # 674
Render Website --- 2.082780122756958 seconds ---
Capo: 0 Key: D Tuning: B F# B E G# 

Render Website --- 2.2134311199188232 seconds ---
Capo: 0 Key: D Tuning: D A D G B E
Process Data --- 0.058197975158691406 seconds ---
Scraped Song # 715
Render Website --- 2.1558380126953125 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.040261268615722656 seconds ---
Scraped Song # 716
Render Website --- 2.0823328495025635 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.02744913101196289 seconds ---
Scraped Song # 717
Render Website --- 2.1073179244995117 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.0298309326171875 seconds ---
Scraped Song # 718
Render Website --- 2.130871057510376 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.03826403617858887 seconds ---
Scraped Song # 719
Render Website --- 2.2198679447174072 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.03245997428894043 seconds ---
Scraped Song # 720
Render Website --- 2.1838698387145996 seconds ---
Capo: 0 Key: D Tuning: E A D G B

Render Website --- 2.1416409015655518 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.037136077880859375 seconds ---
Scraped Song # 761
Render Website --- 2.0553600788116455 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.02899479866027832 seconds ---
Scraped Song # 762
Render Website --- 2.1722488403320312 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.037313222885131836 seconds ---
Scraped Song # 763
Render Website --- 2.1855857372283936 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.03387880325317383 seconds ---
Scraped Song # 764
Render Website --- 2.1621928215026855 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
Process Data --- 0.02808690071105957 seconds ---
Scraped Song # 765
Render Website --- 2.24678897857666 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.03126811981201172 seconds ---
Scraped Song # 766
Render Website --- 2.2273902893066406 seconds ---
Capo: 0 Key: D Tuning: E A D G B

Render Website --- 2.1847450733184814 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.06347203254699707 seconds ---
Scraped Song # 809
Render Website --- 4.300601959228516 seconds ---
Capo: 0 Key: D Tuning: D A D G B E
Process Data --- 0.05102801322937012 seconds ---
Scraped Song # 810
Render Website --- 5.12354588508606 seconds ---
Capo: 0 Key: D Tuning: D A D G B E
Process Data --- 0.05145907402038574 seconds ---
Scraped Song # 811
Render Website --- 2.322023868560791 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.04709982872009277 seconds ---
Scraped Song # 812
Render Website --- 4.312027931213379 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.05826997756958008 seconds ---
Scraped Song # 813
Render Website --- 2.270766019821167 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.03804278373718262 seconds ---
Scraped Song # 814
Render Website --- 2.5731680393218994 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
Pro

Render Website --- 2.1587939262390137 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.034587860107421875 seconds ---
Scraped Song # 859
Render Website --- 2.2825050354003906 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.027235746383666992 seconds ---
Scraped Song # 860
Render Website --- 2.2049360275268555 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.04091811180114746 seconds ---
Scraped Song # 861
Render Website --- 2.0229523181915283 seconds ---
Capo: 0 Key: D Tuning: D A D G B E
Process Data --- 0.041188955307006836 seconds ---
Scraped Song # 862
Render Website --- 1.9841010570526123 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.03122711181640625 seconds ---
Scraped Song # 863
Render Website --- 2.1332969665527344 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.04257702827453613 seconds ---
Scraped Song # 864
Render Website --- 2.1739110946655273 seconds ---
Capo: 0 Key: D Tuning: E A D 

Render Website --- 2.077540159225464 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
Process Data --- 0.03847193717956543 seconds ---
Scraped Song # 905
Render Website --- 2.071072816848755 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.03792381286621094 seconds ---
Scraped Song # 906
Render Website --- 2.0533640384674072 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.02816486358642578 seconds ---
Scraped Song # 907
Render Website --- 2.2250280380249023 seconds ---
Capo: 7 Key: D Tuning: E A D G B E
Process Data --- 0.054074764251708984 seconds ---
Scraped Song # 908
Render Website --- 2.040102958679199 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.026729106903076172 seconds ---
Scraped Song # 909
Render Website --- 2.303122043609619 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.0647270679473877 seconds ---
Scraped Song # 910
Render Website --- 2.1787140369415283 seconds ---
Capo: 0 Key: D Tuning: E A D G B E


Render Website --- 2.376875877380371 seconds ---
Capo: 0 Key: D Tuning: D A D G B E
Process Data --- 0.03910398483276367 seconds ---
Scraped Song # 953
Render Website --- 2.457334041595459 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.04075789451599121 seconds ---
Scraped Song # 954
Render Website --- 2.3903098106384277 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.0298919677734375 seconds ---
Scraped Song # 955
Render Website --- 2.561818838119507 seconds ---
Capo: 0 Key: D Tuning: Eb Ab Db Gb Bb Eb
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/creeper/we-had-a-pact-tabs-2236627
Process Data --- 0.0623469352722168 seconds ---
Scraping Failed for song # 956
Scraped Song # 956
Render Website --- 2.4593188762664795 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.046215057373046875 seconds ---
Scraped Song # 957
Render Website --- 2.773486852645874 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process D

Render Website --- 2.19905424118042 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.06176114082336426 seconds ---
Scraped Song # 999
Render Website --- 2.1084470748901367 seconds ---
Capo: 0 Key: D Tuning: E A D G B E
Process Data --- 0.02904224395751953 seconds ---
Scraped Song # 1000


Unnamed: 0,Name,URL,Key,Tuning,Capo,Tabs
0,Here Comes The Sun (ver 3),https://tabs.ultimate-guitar.com/tab/the-beatl...,D,E A D G B E,7,"[[2, -, 0, 2, -, 2, 0, -, -, -, 0, -, -, -, -,..."
1,Hey There Delilah (ver 2),https://tabs.ultimate-guitar.com/tab/plain-whi...,D,E A D G B E,0,"[[-, -, -, -, -, -, -, -, -, -, -, -, -, -, -,..."
2,Sweet Home Alabama,https://tabs.ultimate-guitar.com/tab/lynyrd-sk...,D,E A D G B E,0,"[[-, -, -, -, -, -, -, -, -, -, -, -, -, -, -,..."
3,Cyberpunk 2077 - Kerrys Song,https://tabs.ultimate-guitar.com/tab/misc-comp...,D,E A D G B E,0,"[[-, -, -, -, -, -, -, -, -, -, -, -, -, -, -,..."
4,Sweet Home Alabama Intro,https://tabs.ultimate-guitar.com/tab/lynyrd-sk...,D,E A D G B E,0,"[[-, -, -, -, -, -, -, -, -, -, -, -, -, -, -,..."
...,...,...,...,...,...,...
845,Canned Heat (ver 2),https://tabs.ultimate-guitar.com/tab/tommy-joh...,D,E A D G B E,0,"[[-, -, -, -, -, 2, -, 2, 5, 5, 5, 5, 5, 4, 3,..."
846,Fossi Figo Intro,https://tabs.ultimate-guitar.com/tab/elio-e-le...,D,E A D G B E,0,"[[-, -, -, -, -, -, -, -, -, -, -, -, -, -, -,..."
847,My Soul Magnifies The Lord Intro,https://tabs.ultimate-guitar.com/tab/chris-tom...,D,E A D G B E,0,"[[-, -, -, -, -, -, -, -, -, -, -, -, -, -, -,..."
848,Talulah Gosh,https://tabs.ultimate-guitar.com/tab/talulah-g...,D,E A D G B E,0,"[[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3,..."


In [4]:
pickle.dump(dataframe, open( "ScrapedSongsKeyD.p", "wb" ) )

In [8]:
sum1 = 0
for data in dataframe.iloc[:,5]:
    sum1+=data.shape[1]

sum2 = 0
for i in range(0,814):
    #print(dataframe.iloc[i,2])
    if dataframe.iloc[i,2] == 'C':
        sum2+=1
sum1

183221

In [None]:
# #TESTING

# asession = AsyncHTMLSession() #creates Async object
# url2 = 'https://tabs.ultimate-guitar.com/tab/the-beatles/here-comes-the-sun-tabs-201130'
# url1 = 'https://tabs.ultimate-guitar.com/tab/led-zeppelin/stairway-to-heaven-tabs-9488'
# r = await asession.get(url1 ,timeout=30) #requests website from server and waits for response

# #print("Rendering JavaScript")
# site = await r.html.arender(timeout=30) #renders the javascript, NOTE: Currently the speed bottleneck as the javascript is taking too long to render

# soup = BeautifulSoup(r.html.html, features='lxml') #creates BeautifulSoup object which will parse the html
# body = soup.find(class_='js-page js-global-wrapper') #finds main body of website which contains the strum patterns
# patterns = body.find_all(class_='_2J-ci') #every tabs line is within a class called '_2J-ci'

# print('Patterns length:',len(patterns))
# #print(patterns[0].text)
# allTabs = []
# allTabs= [parseTab(pattern) for pattern in patterns]
# print('Alltabs length:',len(allTabs))
# x = allTabs[0]
# for i in range(1,len(allTabs)):
#     #print('1st:',allTabs[i-1])
#     print(allTabs[i].shape)
#     print(i)
#     print('2nd:',allTabs[i],'\n')
#     x = np.append(x,allTabs[i],axis = 1)
# print(x.shape)
# fullprint(x[:,0:10])
# fullprint(x.T)
# #print(x[:,0:10])
# #np.append(array,array2,axis=1).shape



In [None]:
# #TESTING

# # for line in patterns[8].children:
# #     print(line.text)
    
# x = patterns[17].find_all(class_='_1zlI0')
# for line in x:
#     if line.text[0] == 'e' or line.text[0] == 'B' or line.text[0] == 'G' or line.text[0] == 'D' or line.text[0] == 'A' or line.text[0] == 'E':
#         print(line.text)

# for line in x:
#     print(line.text)
# x[2]

In [None]:
# #TESTING

# lines = []
# for line in patterns[0].children:
#     lines.append(list(line.text))
# tabs = np.array(lines)
# tabsTranspose = z.T
# #trans[tabsTranspose=='-']= np.NaN
# mask_array = [any(char.isdigit() for char in row) for row in tabsTranspose]
# #print(mask_array)
# array = tabsTranspose[mask_array].T
# #print(array)
# #[''.join(row) for row in array]

# lines = []
# for line in patterns[1].children:
#     lines.append(list(line.text))
# tabs = np.array(lines)
# tabsTranspose = z.T
# #trans[tabsTranspose=='-']= np.NaN
# mask_array = [any(char.isdigit() for char in row) for row in tabsTranspose]
# #print(mask_array)
# array2 = tabsTranspose[mask_array].T
# #print(array)
# [''.join(row) for row in array]
# np.append(array,array2,axis=1).shape


In [None]:
#just learning about list comprehension and in line loops
# array = [[1,2,3,4,5],[6,7,8,9,10]]
# print([[num > 4 for num in row] for row in array])
# print([any(num >4 for num in row) for row in array])
string = 'testing string split function'
string.split()
import re
capo = [int(val) for val in re.findall(r'\d+', 'Capo: 7th')][0]
capo