In [1]:
import requests
from requests_html import HTMLSession
from requests_html import AsyncHTMLSession
from bs4 import BeautifulSoup
import pprint
import numpy as np
import pandas as pd
import time
import pickle
import re

In [9]:
#source: https://stackoverflow.com/questions/1987694/how-to-print-the-full-numpy-array-without-truncation
#function to print out full numpy array instead of excerpt
def fullprint(*args, **kwargs):
    from pprint import pprint
    import numpy
    opt = numpy.get_printoptions()
    numpy.set_printoptions(threshold=numpy.inf)
    pprint(*args, **kwargs)
    numpy.set_printoptions(**opt)
    

#parse HTML text to clean up tabs and return array of tabs (get rid of all the spaces)
def parseTab(TAB):
    lines = [] #will hold all lines of the tab
    for line in TAB.children: #iterate through lines of tab
        #only parse lines with notes (ignores lyric lines and chord lines)
        if 'e|' == line.text[0:2] or 'B|' == line.text[0:2] or 'G|' == line.text[0:2] or 'D|' == line.text[0:2] or 'A|' == line.text[0:2] or 'E|' == line.text[0:2]:
            #print(line.text)
            val = list(line.text)
            if(lines):
                val = val[0:len(lines[0])] #some lines have additional text at the end so removes that
            lines.append(val) #adds to the lines array
    tabs = np.array(lines) #creates numpy array out of the lines
    tabsTranspose = tabs.T #takes transpose of array so we can easily iterate through the columns by making them the rows
    #trans[tabsTranspose=='-']= np.NaN
    mask_array = [any(char.isdigit() for char in row) for row in tabsTranspose] #creates mask that finds the lines that have notes (will be used to filter out empty lines)
    #print(mask_array)
    array = tabsTranspose[mask_array].T #filters out all lines without notes to get array with all notes
    #print()
    flag = False
    if array.shape[0]==6:
        flag = True
    return array,flag
    #print(array)
    #[''.join(row) for row in array]
    
#input HTML beautiful soup and get array of tabs
def getTab(soup):
    body = soup.find(class_='js-page js-global-wrapper') #finds main body of website which contains the strum patterns
    #print(body)
    patterns = body.find_all(class_='_2o1rM') #every tabs line is within a class called '_2o1rM'
    #print('Patterns:',patterns)
    allTabs = [] #will hold each row of tabs
    for pattern in patterns:
        parsedTab,check = parseTab(pattern)
        if check:
            allTabs.append(parsedTab)
    #allTabs= [parsedTab for (parsedTab,check) in parseTab(pattern) if check for pattern in patterns] #parses each row of tab and stores in allTabs
    #print('AllTabs:',allTabs)

    combinedTab = allTabs[0] 
    for i in range(1,len(allTabs)): #combine all the rows of tabs into one long tab
        combinedTab = np.append(combinedTab,allTabs[i],axis = 1)
    return combinedTab


def getAdditionalInfo(soup):
    body = soup.find(class_='js-page js-global-wrapper') #finds main body of website which contains the strum patterns
    info = soup.find(class_='_2I_M-')
    capo = 0
    key = ''
    tuning = ''
    for line in info:
        if 'Capo' in line.text:
            #https://stackoverflow.com/questions/4289331/how-to-extract-numbers-from-a-string-in-python
            capo = [int(val) for val in re.findall(r'\d+', 'Capo: 7th')][0]
        if 'Key' in line.text:
            key = line.text.split()[1]
        if 'Tuning' in line.text:
            tuning = line.text.split(': ')[1]
    return capo,key,tuning


#input a song's ultimate guitar url to get a numpy array of the tabs of that song
async def getSongData(url): #function to parse for strum patterns at given url
    start_time = time.time()
    asession = AsyncHTMLSession() #creates Async object
    
    r = await asession.get(url ,timeout=30) #requests website from server and waits for response

    #print("Rendering JavaScript")
    site = await r.html.arender(timeout=30) #renders the javascript, NOTE: Currently the speed bottleneck as the javascript is taking too long to render
    
    print("Render Website --- %s seconds ---" % (time.time() - start_time))
    start_time = time.time()
    soup = BeautifulSoup(r.html.html, features='lxml') #creates BeautifulSoup object which will parse the html
    #print(soup.prettify())
    try:
        capo,key,tuning = getAdditionalInfo(soup)
        print('Capo:',capo,"Key:",key,"Tuning:",tuning)
        tab = getTab(soup) #calls function that will parse tabs
    except:
        print('----------Scraping Failed----------\n',url,)
        await r.session.close() #closes chromium process
        await asession.close() #close session
    print("Process Data --- %s seconds ---" % (time.time() - start_time))
    r.close()
    await r.session.close() #closes chromium process
    await asession.close() #close session
    return capo,key,tuning,tab #returns array of tabs

async def scrapeSongs(numSongs):
    asession = AsyncHTMLSession() #creates Async object
    page = 1 #starts on page 1 of songs website
    URL = 'https://www.ultimate-guitar.com/explore?type[]=Tabs&page='
    URL_KEY_C = 'https://www.ultimate-guitar.com/explore?type[]=Tabs&tonality[]=15&page='
    URL_KEY_G = 'https://www.ultimate-guitar.com/explore?type[]=Tabs&tonality[]=17&page='
    count = 0 #will hold # of songs checked
    data = []
    while count < numSongs and page <= 20: #each page holds 50 songs so sets limits on number of pages checked
        try:
            r = await asession.get(URL_KEY_G+str(page),timeout=30) #requests website from server and waits for response

            site = await r.html.arender(timeout=30) #renders javascript on page

            soup = BeautifulSoup(r.html.html, features='lxml') #creates BeautifulSoup object which will parse the html
            songs = soup.find_all(class_='_36xEX _3_qAd _17l1x') #the class that holds the url for each song
            for song in songs: #iterates through all songs on page
                try: #try except statements in case javascript rendering times out
                    #print(song['href'])
                    capo,key,tuning,tab = await getSongData(song['href']) #call getData using url of this song to get song data
                    data.append((song.text,song['href'],key,tuning,capo,tab)) #add tuple containing song name, url, and strum patterns to list
    #             except Exception:
    #                 traceback.print_exc()
                except:
                    print('Scraping Failed for song #',count + 1) 

                count += 1 #increase count on # of songs scraped
                if count > numSongs: #if we've scraped enough songs, break and return
                    break
                print('Scraped Song #',count)
            #await r.session.close() #closes chromium process for page
            page += 1 #after all songs scraped from page, increment page count to access next page of songs
        except:
            print("\n\n---------------FAILED TO LOAD PAGE-------------------\n\n")
            page += 1
    await r.session.close()
    await asession.close() #closes AnsyncHTMLSession
    return data

In [None]:
url2 = 'https://tabs.ultimate-guitar.com/tab/the-beatles/here-comes-the-sun-tabs-201130'
url1 = 'https://tabs.ultimate-guitar.com/tab/led-zeppelin/stairway-to-heaven-tabs-9488'
url3 = 'https://tabs.ultimate-guitar.com/tab/nirvana/come-as-you-are-tabs-47319'
check = await getSongData(url2)
print(check)
#fullprint(check.T) #print the transpose because the normal array would be too wide to print cleanly

In [10]:
test = await scrapeSongs(1000)
dataframe = pd.DataFrame(test,columns=['Name','URL','Key','Tuning','Capo','Tabs'])
dataframe

Render Website --- 2.17561674118042 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03294825553894043 seconds ---
Scraped Song # 1
Render Website --- 1.981335163116455 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.028949975967407227 seconds ---
Scraped Song # 2
Render Website --- 2.2528576850891113 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.04118204116821289 seconds ---
Scraped Song # 3
Render Website --- 2.09957218170166 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.030998945236206055 seconds ---
Scraped Song # 4
Render Website --- 2.1263928413391113 seconds ---
Capo: 0 Key: G Tuning: 
Process Data --- 0.03884482383728027 seconds ---
Scraped Song # 5
Render Website --- 2.1300201416015625 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.041214942932128906 seconds ---
Scraped Song # 6
Render Website --- 2.3024961948394775 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.044

Render Website --- 2.0287411212921143 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.02695608139038086 seconds ---
Scraped Song # 52
Render Website --- 2.0516910552978516 seconds ---
Capo: 0 Key: G Tuning: Eb Ab Db Gb Bb Eb
Process Data --- 0.039491891860961914 seconds ---
Scraped Song # 53
Render Website --- 2.130077838897705 seconds ---
Capo: 7 Key: G Tuning: 
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/camping-in-alaska/dragon-ball-z-budokai-tenkaichi-4-tabs-2093635
Process Data --- 0.04939126968383789 seconds ---
Scraping Failed for song # 54
Scraped Song # 54
Render Website --- 2.187299966812134 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.040946006774902344 seconds ---
Scraped Song # 55
Render Website --- 2.0246047973632812 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03056192398071289 seconds ---
Scraped Song # 56
Render Website --- 2.0746021270751953 seconds ---
Capo: 0 Key: G Tuning: D A

Render Website --- 2.1548306941986084 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.045870065689086914 seconds ---
Scraped Song # 102
Render Website --- 2.302253007888794 seconds ---
Capo: 7 Key: G Tuning: E A D G B E
Process Data --- 0.028505802154541016 seconds ---
Scraped Song # 103
Render Website --- 4.557729005813599 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.062223196029663086 seconds ---
Scraped Song # 104
Render Website --- 2.120962142944336 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.024336814880371094 seconds ---
Scraped Song # 105
Render Website --- 2.0689611434936523 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.034857988357543945 seconds ---
Scraped Song # 106
Render Website --- 2.10144305229187 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.027692794799804688 seconds ---
Scraped Song # 107
Render Website --- 2.213460922241211 seconds ---
Capo: 0 Key: G Tuning: E A D G B

Render Website --- 2.0810139179229736 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03785991668701172 seconds ---
Scraped Song # 152
Render Website --- 2.2592360973358154 seconds ---
Capo: 7 Key: G Tuning: E A D G B E
Process Data --- 0.04365706443786621 seconds ---
Scraped Song # 153
Render Website --- 2.136361837387085 seconds ---
Capo: 7 Key: G Tuning: E A D G B E
Process Data --- 0.04953289031982422 seconds ---
Scraped Song # 154
Render Website --- 2.047481060028076 seconds ---
Capo: 0 Key: G Tuning: D G C F A D
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/biffy-clyro/cop-syrup-tabs-3365327
Process Data --- 0.05981588363647461 seconds ---
Scraping Failed for song # 155
Scraped Song # 155
Render Website --- 2.273458242416382 seconds ---
Capo: 0 Key: G Tuning: B E A D F# B
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/vola/head-mounted-sideways-tabs-3466562
Process Data --- 0.13243699073791504 seconds ---
Sc

Render Website --- 2.081688165664673 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03576493263244629 seconds ---
Scraped Song # 198
Render Website --- 2.0170066356658936 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.027431011199951172 seconds ---
Scraped Song # 199
Render Website --- 2.33975887298584 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.02853083610534668 seconds ---
Scraped Song # 200
Render Website --- 2.049514055252075 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.030077219009399414 seconds ---
Scraped Song # 201
Render Website --- 2.093702793121338 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.028641700744628906 seconds ---
Scraped Song # 202
Render Website --- 2.223273992538452 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.0297086238861084 seconds ---
Scraped Song # 203
Render Website --- 2.2391469478607178 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
P

Render Website --- 2.080260753631592 seconds ---
Capo: 0 Key: G Tuning: D G D G B D
Process Data --- 0.031090736389160156 seconds ---
Scraped Song # 247
Render Website --- 2.0494062900543213 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.026821136474609375 seconds ---
Scraped Song # 248
Render Website --- 3.572193145751953 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03347611427307129 seconds ---
Scraped Song # 249
Render Website --- 2.094909906387329 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.030355215072631836 seconds ---
Scraped Song # 250
Render Website --- 2.1403582096099854 seconds ---
Capo: 7 Key: G Tuning: E A D G B E
Process Data --- 0.0337681770324707 seconds ---
Scraped Song # 251
Render Website --- 2.218658924102783 seconds ---
Capo: 7 Key: G Tuning: E A D G B E
Process Data --- 0.030587196350097656 seconds ---
Scraped Song # 252
Render Website --- 2.297590970993042 seconds ---
Capo: 7 Key: G Tuning: E A D G B E

Render Website --- 2.3138763904571533 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03482389450073242 seconds ---
Scraped Song # 298
Render Website --- 2.113330841064453 seconds ---
Capo: 0 Key: G Tuning: D G D G B D
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/pink-floyd/fearless-tabs-2704638
Process Data --- 0.044981956481933594 seconds ---
Scraping Failed for song # 299
Scraped Song # 299
Render Website --- 2.0971100330352783 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03361701965332031 seconds ---
Scraped Song # 300
Render Website --- 1.9653372764587402 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.02945995330810547 seconds ---
Scraped Song # 301
Render Website --- 2.1164908409118652 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.0293731689453125 seconds ---
Scraped Song # 302
Render Website --- 2.252429246902466 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data -

Render Website --- 2.1560630798339844 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.0401921272277832 seconds ---
Scraped Song # 347
Render Website --- 2.139159917831421 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.04873180389404297 seconds ---
Scraped Song # 348
Render Website --- 2.151545763015747 seconds ---
Capo: 7 Key: G Tuning: E A D G B E
Process Data --- 0.02983713150024414 seconds ---
Scraped Song # 349
Render Website --- 2.318505048751831 seconds ---
Capo: 7 Key: G Tuning: E A D G B E
Process Data --- 0.038704872131347656 seconds ---
Scraped Song # 350
Render Website --- 2.1012048721313477 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03327202796936035 seconds ---
Scraped Song # 351
Render Website --- 2.142392873764038 seconds ---
Capo: 7 Key: G Tuning: E A D G B E
Process Data --- 0.0359799861907959 seconds ---
Scraped Song # 352
Render Website --- 2.168412923812866 seconds ---
Capo: 0 Key: G Tuning: D A D G B E
Pro

Render Website --- 2.4294369220733643 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.07378506660461426 seconds ---
Scraped Song # 397
Render Website --- 2.2771902084350586 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/all-them-witches/mellowing-tabs-2569950
Process Data --- 0.06960487365722656 seconds ---
Scraping Failed for song # 398
Scraped Song # 398
Render Website --- 2.097247838973999 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.029125213623046875 seconds ---
Scraped Song # 399
Render Website --- 2.1349010467529297 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.029553890228271484 seconds ---
Scraped Song # 400
Render Website --- 2.140765905380249 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.029387950897216797 seconds ---
Scraped Song # 401
Render Website --- 2.084409236907959 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Proce

Render Website --- 2.8221209049224854 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.028022050857543945 seconds ---
Scraped Song # 447
Render Website --- 2.935792922973633 seconds ---
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/halsey/finally-beautiful-stranger-tabs-3141155
Process Data --- 0.07056093215942383 seconds ---
Scraping Failed for song # 448
Scraped Song # 448
Render Website --- 2.613736867904663 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.02870917320251465 seconds ---
Scraped Song # 449
Render Website --- 2.1060121059417725 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/2977334
Process Data --- 0.047731876373291016 seconds ---
Scraping Failed for song # 450
Scraped Song # 450
Render Website --- 2.149657964706421 seconds ---
Capo: 0 Key: G Tuning: D A D G B E
Process Data --- 0.043936967849731445 seconds ---
Scraped Song # 451
Rende

Render Website --- 2.459101915359497 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03200483322143555 seconds ---
Scraped Song # 498
Render Website --- 2.3311429023742676 seconds ---
Capo: 0 Key: G Tuning: Eb Ab Db Gb Bb Eb
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/guns-n-roses/knockin-on-heavens-door-tabs-3073100
Process Data --- 0.06727910041809082 seconds ---
Scraping Failed for song # 499
Scraped Song # 499
Render Website --- 2.2184107303619385 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.046629905700683594 seconds ---
Scraped Song # 500
Render Website --- 2.0988240242004395 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.029572725296020508 seconds ---
Scraped Song # 501
Render Website --- 2.1737847328186035 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03178000450134277 seconds ---
Scraped Song # 502
Render Website --- 2.1103808879852295 seconds ---
Capo: 0 Key: G Tuning: 

Render Website --- 2.0858309268951416 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03044605255126953 seconds ---
Scraped Song # 549
Render Website --- 2.0997748374938965 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.02807927131652832 seconds ---
Scraped Song # 550
Render Website --- 2.15974497795105 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.034110069274902344 seconds ---
Scraped Song # 551
Render Website --- 2.2160520553588867 seconds ---
Capo: 7 Key: G Tuning: E A D G B E
Process Data --- 0.0508120059967041 seconds ---
Scraped Song # 552
Render Website --- 2.345449924468994 seconds ---
Capo: 7 Key: G Tuning: E A D G B E
Process Data --- 0.028931140899658203 seconds ---
Scraped Song # 553
Render Website --- 2.219886064529419 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.04273509979248047 seconds ---
Scraped Song # 554
Render Website --- 2.420748233795166 seconds ---
Capo: 7 Key: G Tuning: E A D G B E
P

Render Website --- 2.3704941272735596 seconds ---
Capo: 0 Key: G Tuning: D A D G B E
Process Data --- 0.05868697166442871 seconds ---
Scraped Song # 600
Render Website --- 2.311635971069336 seconds ---
Capo: 0 Key: G Tuning: D A D G B E
Process Data --- 0.05517697334289551 seconds ---
Scraped Song # 601
Render Website --- 2.0300660133361816 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.0274200439453125 seconds ---
Scraped Song # 602
Render Website --- 2.370774030685425 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.030009984970092773 seconds ---
Scraped Song # 603
Render Website --- 2.2135589122772217 seconds ---
Capo: 7 Key: G Tuning: E A D G B E
Process Data --- 0.032311201095581055 seconds ---
Scraped Song # 604
Render Website --- 2.1952340602874756 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03928184509277344 seconds ---
Scraped Song # 605
Render Website --- 2.0439348220825195 seconds ---
Capo: 0 Key: G Tuning: E A D G B 

Render Website --- 2.2499310970306396 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.04419302940368652 seconds ---
Scraped Song # 653
Render Website --- 2.3888168334960938 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03242301940917969 seconds ---
Scraped Song # 654
Render Website --- 2.204793930053711 seconds ---
Capo: 7 Key: G Tuning: E A D G B E
Process Data --- 0.031148910522460938 seconds ---
Scraped Song # 655
Render Website --- 2.4305989742279053 seconds ---
Capo: 7 Key: G Tuning: E A D G B E
Process Data --- 0.03965282440185547 seconds ---
Scraped Song # 656
Render Website --- 2.1799097061157227 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03825187683105469 seconds ---
Scraped Song # 657
Render Website --- 2.1472811698913574 seconds ---
Capo: 0 Key: G Tuning: D A D G B E
Process Data --- 0.03468966484069824 seconds ---
Scraped Song # 658
Render Website --- 2.0948522090911865 seconds ---
Capo: 0 Key: G Tuning: E A D G B

Render Website --- 5329.734058141708 seconds ---
Capo: 0 Key: G Tuning: D G D G B D
Process Data --- 0.03827023506164551 seconds ---
Scraped Song # 699
Render Website --- 13.722937107086182 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.04366588592529297 seconds ---
Scraped Song # 700
Render Website --- 10.467144966125488 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.05540800094604492 seconds ---
Scraped Song # 701
Render Website --- 3.5345120429992676 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.05181694030761719 seconds ---
Scraped Song # 702
Render Website --- 3.26813006401062 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03420591354370117 seconds ---
Scraped Song # 703
Render Website --- 6.600407838821411 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.05399584770202637 seconds ---
Scraped Song # 704
Render Website --- 2.4932570457458496 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
P

Render Website --- 2.6060850620269775 seconds ---
Capo: 0 Key: G Tuning: Eb Ab Db Gb Bb Eb
Process Data --- 0.035420894622802734 seconds ---
Scraped Song # 751
Render Website --- 2.548060178756714 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03026294708251953 seconds ---
Scraped Song # 752
Render Website --- 2.692659854888916 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03826618194580078 seconds ---
Scraped Song # 753
Render Website --- 2.3017990589141846 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.027704954147338867 seconds ---
Scraped Song # 754
Render Website --- 2.3097472190856934 seconds ---
Capo: 0 Key: G Tuning: D G D G B D
Process Data --- 0.02990889549255371 seconds ---
Scraped Song # 755
Render Website --- 2.366909980773926 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.0417170524597168 seconds ---
Scraped Song # 756
Render Website --- 2.1561098098754883 seconds ---
Capo: 0 Key: G Tuning: E A D

Render Website --- 2.623999834060669 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.045755863189697266 seconds ---
Scraped Song # 798
Render Website --- 2.7294440269470215 seconds ---
Capo: 7 Key: G Tuning: E A D G B E
Process Data --- 0.055383920669555664 seconds ---
Scraped Song # 799
Render Website --- 2.678086042404175 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.05114483833312988 seconds ---
Scraped Song # 800
Render Website --- 2.3050920963287354 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.028705596923828125 seconds ---
Scraped Song # 801
Render Website --- 2.4101741313934326 seconds ---
Capo: 7 Key: G Tuning: E A D G B E
Process Data --- 0.029618024826049805 seconds ---
Scraped Song # 802
Render Website --- 2.4120640754699707 seconds ---
Capo: 7 Key: G Tuning: 
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/colbie-caillat/bubbly-tabs-666186
Process Data --- 0.05745077133178711 seconds ---
Sc

Render Website --- 2.4243969917297363 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.04848527908325195 seconds ---
Scraped Song # 847
Scraping Failed for song # 848
Scraped Song # 848
Render Website --- 2.4498181343078613 seconds ---
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/needtobreathe/no-excuses-tabs-2238173
Process Data --- 0.04055023193359375 seconds ---
Scraping Failed for song # 849
Scraped Song # 849
Render Website --- 6.510504961013794 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.0636899471282959 seconds ---
Scraped Song # 850
Render Website --- 2.504732847213745 seconds ---
Capo: 0 Key: G Tuning: 
Process Data --- 0.03436017036437988 seconds ---
Scraped Song # 851
Render Website --- 2.744741201400757 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.0312960147857666 seconds ---
Scraped Song # 852
Render Website --- 2.7703380584716797 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process 

Render Website --- 6.433643102645874 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.045300960540771484 seconds ---
Scraped Song # 897
Render Website --- 2.3255178928375244 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/texas/i-dont-want-a-lover-tabs-2637999
Process Data --- 0.050421714782714844 seconds ---
Scraping Failed for song # 898
Scraped Song # 898
Render Website --- 2.3649017810821533 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03407788276672363 seconds ---
Scraped Song # 899
Render Website --- 2.400103807449341 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03849196434020996 seconds ---
Scraped Song # 900
Render Website --- 3.1749050617218018 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.034754037857055664 seconds ---
Scraped Song # 901
Render Website --- 2.433928966522217 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Proces

Render Website --- 2.296903133392334 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.028091907501220703 seconds ---
Scraped Song # 949
Render Website --- 2.574875831604004 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03017711639404297 seconds ---
Scraped Song # 950
Render Website --- 2.7161569595336914 seconds ---
Capo: 0 Key: G Tuning: Eb Ab Db Gb Bb Eb
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/modern-baseball/hope-tabs-2115083
Process Data --- 0.0603032112121582 seconds ---
Scraping Failed for song # 951
Scraped Song # 951
Render Website --- 2.8415181636810303 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/sworn-in/dont-look-at-me-tabs-2115213
Process Data --- 0.06655693054199219 seconds ---
Scraping Failed for song # 952
Scraped Song # 952
Render Website --- 2.6765999794006348 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 

Render Website --- 2.644205093383789 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.036338090896606445 seconds ---
Scraped Song # 993
Render Website --- 2.46071195602417 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.0297849178314209 seconds ---
Scraped Song # 994
Render Website --- 2.5135960578918457 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03721499443054199 seconds ---
Scraped Song # 995
Render Website --- 2.5434410572052 seconds ---
Capo: 7 Key: G Tuning: E A D G B E
----------Scraping Failed----------
 https://tabs.ultimate-guitar.com/tab/pointer-sisters/fairytale-tabs-2180123
Process Data --- 0.07678103446960449 seconds ---
Scraping Failed for song # 996
Scraped Song # 996
Render Website --- 2.3718042373657227 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data --- 0.03246498107910156 seconds ---
Scraped Song # 997
Render Website --- 7.383486032485962 seconds ---
Capo: 0 Key: G Tuning: E A D G B E
Process Data 

Unnamed: 0,Name,URL,Key,Tuning,Capo,Tabs
0,Cant Help Falling In Love,https://tabs.ultimate-guitar.com/tab/elvis-pre...,G,E A D G B E,0,"[[3, -, -, -, 7, -, -, -, 3, -, -, -, -, -, 5,..."
1,Star Shopping Intro,https://tabs.ultimate-guitar.com/tab/lil-peep/...,G,E A D G B E,0,"[[-, -, -, -, -, -, -, -, -, -, -, -, -, -, -,..."
2,Blackbird (ver 5),https://tabs.ultimate-guitar.com/tab/the-beatl...,G,E A D G B E,0,"[[-, -, -, -, -, -, -, -, -, -, -, -, -, -, -,..."
3,Blackbird (ver 2),https://tabs.ultimate-guitar.com/tab/the-beatl...,G,E A D G B E,0,"[[-, -, -, -, -, -, -, -, -, -, -, -, -, -, -,..."
4,Creep,https://tabs.ultimate-guitar.com/tab/radiohead...,G,,0,"[[-, -, -, -, -, -, -, -, -, -, -, -, -, -, -,..."
...,...,...,...,...,...,...
885,Figures Intro (ver 4),https://tabs.ultimate-guitar.com/tab/jessie-re...,G,E A D G B E,0,"[[-, -, -, 2, -, -, -, -, -, 2, -, -, -, -, -,..."
886,Story Of A Girl Solo (ver 2),https://tabs.ultimate-guitar.com/tab/nine-days...,G,E A D G B E,0,"[[-, -, -, -, -, -, -, -, -, -, -, -, -, -, -,..."
887,Catch A Falling Star (ver 2),https://tabs.ultimate-guitar.com/tab/perry-com...,G,E A D G B E,0,"[[-, -, -, -, -, -, -, -, -, -, -, -, -, -, -,..."
888,Air On The G String Intro (ver 2),https://tabs.ultimate-guitar.com/tab/johann-se...,G,E A D G B E,0,"[[0, -, -, -, -, -, -, -, -, 5, 1, -, -, -, -,..."


In [11]:
pickle.dump(dataframe, open( "ScrapedSongsKeyG.p", "wb" ) )

In [8]:
sum1 = 0
for data in dataframe.iloc[:,5]:
    sum1+=data.shape[1]

sum2 = 0
for i in range(0,814):
    #print(dataframe.iloc[i,2])
    if dataframe.iloc[i,2] == 'C':
        sum2+=1
sum1

183221

In [None]:
# #TESTING

# asession = AsyncHTMLSession() #creates Async object
# url2 = 'https://tabs.ultimate-guitar.com/tab/the-beatles/here-comes-the-sun-tabs-201130'
# url1 = 'https://tabs.ultimate-guitar.com/tab/led-zeppelin/stairway-to-heaven-tabs-9488'
# r = await asession.get(url1 ,timeout=30) #requests website from server and waits for response

# #print("Rendering JavaScript")
# site = await r.html.arender(timeout=30) #renders the javascript, NOTE: Currently the speed bottleneck as the javascript is taking too long to render

# soup = BeautifulSoup(r.html.html, features='lxml') #creates BeautifulSoup object which will parse the html
# body = soup.find(class_='js-page js-global-wrapper') #finds main body of website which contains the strum patterns
# patterns = body.find_all(class_='_2J-ci') #every tabs line is within a class called '_2J-ci'

# print('Patterns length:',len(patterns))
# #print(patterns[0].text)
# allTabs = []
# allTabs= [parseTab(pattern) for pattern in patterns]
# print('Alltabs length:',len(allTabs))
# x = allTabs[0]
# for i in range(1,len(allTabs)):
#     #print('1st:',allTabs[i-1])
#     print(allTabs[i].shape)
#     print(i)
#     print('2nd:',allTabs[i],'\n')
#     x = np.append(x,allTabs[i],axis = 1)
# print(x.shape)
# fullprint(x[:,0:10])
# fullprint(x.T)
# #print(x[:,0:10])
# #np.append(array,array2,axis=1).shape



In [None]:
# #TESTING

# # for line in patterns[8].children:
# #     print(line.text)
    
# x = patterns[17].find_all(class_='_1zlI0')
# for line in x:
#     if line.text[0] == 'e' or line.text[0] == 'B' or line.text[0] == 'G' or line.text[0] == 'D' or line.text[0] == 'A' or line.text[0] == 'E':
#         print(line.text)

# for line in x:
#     print(line.text)
# x[2]

In [None]:
# #TESTING

# lines = []
# for line in patterns[0].children:
#     lines.append(list(line.text))
# tabs = np.array(lines)
# tabsTranspose = z.T
# #trans[tabsTranspose=='-']= np.NaN
# mask_array = [any(char.isdigit() for char in row) for row in tabsTranspose]
# #print(mask_array)
# array = tabsTranspose[mask_array].T
# #print(array)
# #[''.join(row) for row in array]

# lines = []
# for line in patterns[1].children:
#     lines.append(list(line.text))
# tabs = np.array(lines)
# tabsTranspose = z.T
# #trans[tabsTranspose=='-']= np.NaN
# mask_array = [any(char.isdigit() for char in row) for row in tabsTranspose]
# #print(mask_array)
# array2 = tabsTranspose[mask_array].T
# #print(array)
# [''.join(row) for row in array]
# np.append(array,array2,axis=1).shape


In [None]:
#just learning about list comprehension and in line loops
# array = [[1,2,3,4,5],[6,7,8,9,10]]
# print([[num > 4 for num in row] for row in array])
# print([any(num >4 for num in row) for row in array])
string = 'testing string split function'
string.split()
import re
capo = [int(val) for val in re.findall(r'\d+', 'Capo: 7th')][0]
capo