In [89]:
from bs4 import BeautifulSoup
from scipy.spatial import cKDTree as KDTree
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from pyvirtualdisplay import Display

import csv
import cv2
import datetime
import matplotlib
import multiprocessing as mp
import numpy as np
import re
import string
import sys
import unidecode
import traceback
import time

In [90]:
gridCount=1
rois=[]
PATH='/home/abhiavk/git/Website-Evolution/'

In [91]:
def timeTaken(startTime, Metric, MetricValue=""):
	print(Metric.ljust(25," "),datetime.datetime.now()-startTime,"\t\t",MetricValue)

In [92]:
def string_to_words(txt):
    txt=re.sub("[^\w]"," ",  txt)
    txt=re.sub("[0-9]+"," ",  txt)
    txt=re.sub(" [a-zA-Z]{1} "," ",  txt)
    txt=re.sub(" [a-zA-Z]{2} "," ",  txt)
    txt=re.sub("[ ]+"," ",txt)
    return txt.split()

In [93]:
def get_words(d):
    txt=""
    try:
        txt+=d.execute_script("return document.body.innerText;")
    except:
        pass
    try:
        txt+=d.execute_script("return document.innerText;")
    except:
        pass
    words = string_to_words(str(unidecode.unidecode(txt)))
    return words

In [94]:
def get_word_count(d):
    startTime=datetime.datetime.now()
    words=get_words(d)
    wordCount=float(len(words))
    #timeTaken(startTime,"Word Count",wordCount)
    return wordCount

In [95]:
def get_text_body_ratio(soup,wordCount):

    startTime=datetime.datetime.now()
    headers=[]
    for i in range(1,7):
        headers+=soup.findAll("h"+str(i))
    sizeHeaders=[]
    sizeHeaders+=soup.findAll("font",{"size":"3"})
    sizeHeaders+=soup.findAll("font",{"size":"4"})
    sizeHeaders+=soup.findAll("font",{"size":"5"})
    txt=""
    for i in headers:
        txt+=" "+i.text
    for i in sizeHeaders:
        txt+=" "+i.text
    words=[]
    if len(txt)!=0:
        words=string_to_words(str(unidecode.unidecode(txt)))
    #print words
    try:
        headTextCount=float(len(words))
    except:
        headtextCount=0.0
    if wordCount:
        textBodyRatio=headTextCount/wordCount
    else:
        textBodyRatio=0.0
    #timeTaken(startTime,"Text Body Ratio",textBodyRatio)
    return textBodyRatio

In [96]:
def get_emph_body_text_percentage(d,bs,wordCount):

    #print "Param3"
    startTime=datetime.datetime.now()
    boldText = bs.findAll("b")
    words=[]
    for i in boldText:
        try:
            words+= string_to_words(str(unidecode.unidecode(i.text)))
        except:
            pass
    boldWordCount=len(words)
    try:
        txt=str(unidecode.unidecode(d.execute_script("return document.body.innerText")))
    except:
        txt=str(unidecode.unidecode(d.execute_script("return document.body.textContent")))
    pattern = re.compile("!+")
    exclWordCount=len(re.findall(pattern,txt))
    words=get_words(d)
    capWordCount=0
    for i in words:
        if i==i.upper():
            capWordCount+=1

    #print boldWordCount, exclWordCount, capWordCount

    emphTextCount=float(boldWordCount + exclWordCount + capWordCount)

    if wordCount:
        emphTextPercent=(emphTextCount/wordCount)*100.0
    else:
        emphTextPercent=0.0
    #timeTaken(startTime,"Emph text Percent",emphTextPercent)
    return emphTextPercent

In [97]:
def get_text_position_changes(s):
    startTime=datetime.datetime.now()
    #print "Param
    elem=s.findAll()
    prev=""
    textPositionChanges=0
    for i in elem:
        try:
            string=str(i["style"])
            if "text-align:"in string:
                align=string.split("text-align:")[1]
                position=align.split(";")[0].strip()
                if position!=prev:
                    textPositionChanges+=1
                    prev=position
        except:
            pass
    #timeTaken(startTime,"Text Positional Changes",textPositionChanges)
    return textPositionChanges

In [98]:
def get_text_clusters(d,bs):

	#print "Param5"
	startTime=datetime.datetime.now()
	tableText= bs.findAll("td")+bs.findAll("table")
	paraText = bs.findAll("p")
	textClusters=len(tableText)+len(paraText)
	#timeTaken(startTime,"Text Clusters",textClusters)
	return textClusters

In [99]:
def get_visible_links(d,bs):

	#print "Param6"
	startTime=datetime.datetime.now()
	links=bs.findAll("a")
	visibleLinkCount=0
	for i in links:
		if i.text != "":
			visibleLinkCount+=1
	#timeTaken(startTime,"Visible Links",visibleLinkCount)
	return visibleLinkCount

In [100]:
def get_page_size(d):

	#print "Param7"
	startTime=datetime.datetime.now()
	scriptToExecute = "	var performance = 	window.performance ||\
											window.mozPerformance ||\
											window.msPerformance ||\
									 		window.webkitPerformance || {};\
						var network 	= 	performance.getEntries() || {};\
						return network;"
	networkData = d.execute_script(scriptToExecute)
	pageSize=0
	for i in networkData:
		try:
			pageSize+=float(i[u'transferSize'])
		except:
			pass
	pageSize=float(pageSize)/1024.0
	#timeTaken(startTime,"Page Size",pageSize)
	return pageSize

In [101]:
def get_graphics_percent(d,pageSize):

	#print "Param8"
	startTime=datetime.datetime.now()
	scriptToExecute = "var performance = window.performance || window.mozPerformance || window.msPerformance || window.webkitPerformance || {}; var network = performance.getEntries() || {}; return network;"
	networkData = d.execute_script(scriptToExecute)
	graphicsSize=0.0
	for i in networkData:
		try:
			if i[u'initiatorType']== u'script' or i[u'initiatorType']==u'img' or i['initiatorType']== u'css':
				graphicsSize+=float(i[u'transferSize'])
		except:
			pass
	graphicsSize=float(graphicsSize)/1024.0

	if pageSize==0:
		graphicsPercent=0.0
	else:
		graphicsPercent=graphicsSize*100.0/pageSize
	#timeTaken(startTime,"Graphic Size",graphicsSize)
	return graphicsPercent

In [102]:
def get_graphics_count(d,bs):
	startTime=datetime.datetime.now()
	#print "Param9"
	styleSteets=bs.findAll("style")
	scripts=bs.findAll("script")
	images=d.execute_script("return document.images;")
	graphicsCount=len(styleSteets)+len(images)+len(scripts)
	#timeTaken(startTime,"Graphics Count",graphicsCount)
	return  graphicsCount

In [103]:
def get_color_count(image):
    startTime=datetime.datetime.now()
    use_colors = matplotlib.colors.cnames
    named_colors = {k: tuple(map(int, (v[1:3], v[3:5], v[5:7]), 3*(16,))) for k, v in use_colors.items()}
    ncol = len(named_colors)
    no_match = named_colors['purple']

    color_tuples = list(named_colors.values())
    color_tuples.append(no_match)
    color_tuples = np.array(color_tuples)

    color_names = list(named_colors)
    color_names.append('no match')

    tree = KDTree(color_tuples[:-1])

    tolerance = np.inf
    dist, idx = tree.query(image, distance_upper_bound=tolerance)

    colCounts = np.bincount(idx.ravel(), None, ncol+1).tolist()
    colNames  = color_names

    colors=[]
    for i in range(len(color_names)):
        colors.append([colCounts[i],color_names[i]])

    colors.sort(reverse=True)

    colorCount=0
    for color in colors:
        if color[0]>=7864: #1% of the pixels
            colorCount+=1
        else:
            break

    #timeTaken(startTime,"Color Count",colorCount)
    return colorCount

In [104]:
def get_font_count(d,bs):
    startTime=datetime.datetime.now()
    divCount=len(bs.findAll("div"))
    diffFont=set([])
    for i in range(divCount):
        fontStr=""
        script='return document.getElementsByTagName("div")['+str(i)+']["style"]'
        fontStr+=d.execute_script(script+'["font"];')+"font"
        fontStr+=d.execute_script(script+'["fontDisplay"];')+"fontDisplay"
        fontStr+=d.execute_script(script+'["fontFamily"];')+"fontFamily"
        fontStr+=d.execute_script(script+'["fontFeatureSettings"];')+"fontFeatureSettings"
        fontStr+=d.execute_script(script+'["fontKerning"];')+"fontKerning"
        fontStr+=d.execute_script(script+'["fontSize"];')+"fontSize"
        fontStr+=d.execute_script(script+'["fontStretch"];')+"fontStretch"
        fontStr+=d.execute_script(script+'["fontStyle"];')+"fontStyle"
        fontStr+=d.execute_script(script+'["fontVariant"];')+"fontVariant"
        fontStr+=d.execute_script(script+'["fontVariantCaps"];')+"fontVariantCaps"
        fontStr+=d.execute_script(script+'["fontVariantEastAsian"];')+"fontVariantEastAsian"
        fontStr+=d.execute_script(script+'["fontVariantLigatures"];')+"fontVariantLigatures"
        fontStr+=d.execute_script(script+'["fontVariantNumeric"];')+"fontVariantNumeric"
        fontStr+=d.execute_script(script+'["fontVariationSettings"];')+"fontVariationSettings"
        fontStr+=d.execute_script(script+'["fontWeight"];')+"fontWeight"

        diffFont.add(fontStr)
    #print(diffFont)
    fontCount=len(diffFont)-1 # -1 for empty font (default font)
    #timeTaken(startTime,"Font Count",fontCount)
    return fontCount

In [105]:
def getColorfullness(image):
    startTime=datetime.datetime.now()
    (B, G, R) = cv2.split(image.astype("float"))
    rg = np.absolute(R - G)
    yb = np.absolute(0.5 * (R + G) - B)
    (rbMean, rbStd) = (np.mean(rg), np.std(rg))
    (ybMean, ybStd) = (np.mean(yb), np.std(yb))
    stdRoot = np.sqrt((rbStd ** 2) + (ybStd ** 2))
    meanRoot = np.sqrt((rbMean ** 2) + (ybMean ** 2))
    colourFullness = stdRoot + (0.3 * meanRoot)
    #timeTaken(startTime,"Colourfullness",colourFullness)
    return colourFullness

In [106]:
def getVisualComplexity(image,num):
    startTime=datetime.datetime.now()
    year=sys.argv[-2]
    def splitImage(inImg):
        h,w = inImg.shape[0], inImg.shape[1]
        off1X=0
        off1Y=0
        off2X=0
        off2Y=0
        if w >= h:  #split X
            off1X=0
            off2X=int(w/2)
            img1 = inImg[0:h, 0:off2X]
            img2 = inImg[0:h, off2X:w]
        else:       #split Y
            off1Y=0
            off2Y=int(h/2)
            img1 = inImg[0:off2Y, 0:w]
            img2 = inImg[off2Y:h, 0:w]
        return off1X,off1Y,img1, off2X,off2Y,img2
    def qt(inImg, minStd, minSize, offX, offY):
        global gridCount
        global rois
        h,w = inImg.shape[0], inImg.shape[1]
        m,s = cv2.meanStdDev(inImg)
        if s>=minStd and max(h,w)>minSize:
            oX1,oY1,im1, oX2,oY2,im2 = splitImage(inImg)
            gridCount+=1
            qt(im1, minStd, minSize, offX+oX1, offY+oY1)
            qt(im2, minStd, minSize, offX+oX2, offY+oY2)
        else:
            rois.append([offX,offY,w,h,m,s])

    global gridCount
    global rois

    gridCount=1
    rois=[]
    offX, offY=0,0
    minDev        = 10.0
    minSz         = 20

    #cv2.imshow('Start Image',image)
    h,w = image.shape[0], image.shape[1]
    m,s = cv2.meanStdDev(image)
    qt(image,minDev,minSz,offX,offY)
    imgOut=image
    for e in rois:
        col=255
        if e[5]<minDev:
            col=0
        cv2.rectangle(imgOut, (e[0],e[1]), (e[0]+e[2],e[1]+e[3]), col, 1)
    cv2.imwrite(PATH+'webScreenshot/'+str(year)+'/screenshot'+str(num)+'_Quad.png',imgOut)
    #cv2.imshow('Quad Image',imgOut)
    #cv2.waitKey(0)
    #cv2.destroyAllWindows()
    visualComplexity=gridCount#((gridCount*1.0)/(1024.0*768.0))**-1
    #timeTaken(startTime,"Visual Complexity",visualComplexity)
    return visualComplexity

In [107]:
def setDriverOptions():
    options 				= Options()
    options.binary_location = "webEvPy/bin/chromium-browser"
    chrome_driver_binary	= "webEvPy/bin/chromedriver"
    #options.add_argument("--headless")
    return	webdriver.Chrome(options=options)

In [134]:
def getMetrics(urlFile):
    num=urlFile['id']
    url=urlFile['urls']
    #print(url,num)
    startTime 		= datetime.datetime.now()
    textFilename	= PATH+"yearMetrics/CorruptUrls"+str(year)+".txt"
    csvFilename		= PATH+"yearMetrics/tempMpUrlMetrics"+str(year)+".csv"
    try:
        driver			= setDriverOptions()
        driver.get(url)
        try:
            style=driver.find_element_by_xpath("//div[@id='wm-ipp-base']")
            style=driver.execute_script("arguments[0].style.display = 'none'; return arguments[0];", style)
        except:
            print("Hiding wb toolbar error")
            pass
        driver.implicitly_wait(10)
        time.sleep(5)
        driver.set_window_size(1024, 768)
        WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
        
        imagePath=PATH+'webScreenshot/'+str(year)+'/screenshot'+str(num)+'.png'
        #print(imagePath)
        driver.save_screenshot(imagePath)
        image = cv2.imread(imagePath)
        imageGrey = cv2.imread(imagePath,0)
        page_source=driver.page_source
        soup=BeautifulSoup(page_source,'html.parser')
        #---------------------------------------------------#
        #--------- Web Metric Calculation ------------------#
        #---------------------------------------------------#
        wordCount				= get_word_count(driver)#Parameter 1
        textBodyRatio			= get_text_body_ratio(soup,wordCount)#Parameter 2
        emphTextPercent			= get_emph_body_text_percentage(driver,soup,wordCount)#Parameter 3
        textPositionalChanges	= get_text_position_changes(soup)#Parameter 4
        textClusters			= get_text_clusters(driver,soup)#Parameter 5
        visibleLinks			= get_visible_links(driver,soup)#Parameter 6
        pageSize				= get_page_size(driver)#Parameter 7
        graphicsPercent			= get_graphics_percent(driver,pageSize)#Parameter 8
        graphicsCount 			= get_graphics_count(driver,soup)#Parameter 9
        colorCount				= get_color_count(image)#Parameter 10
        fontCount				= get_font_count(driver,soup)#Parameter 11
        colourFullness			= getColorfullness(image)#Parameter 12
        visualComplexity		= getVisualComplexity(imageGrey,num)


        tempMetrics=[
                    num,\
                    wordCount,\
                    textBodyRatio,\
                    emphTextPercent,\
                    textPositionalChanges,\
                    textClusters,\
                    visibleLinks,\
                    pageSize,\
                    graphicsPercent,\
                    graphicsCount,\
                    colorCount,\
                    fontCount,\
                    colourFullness,\
                    visualComplexity,\
                    url
            ]
        line=tempMetrics
        csvFile		= open(csvFilename,"a+")
        csvWriter	= csv.writer(csvFile)
        csvWriter.writerow(line)
        csvFile.close()
        driver.close()
    except:
        print(traceback.format_exc())
        try:
            driver.close()
        except:
            print("###Error : Couldn't close driver")
        print("Error scraping the Url")
        f2			= open(textFilename,"a+")
        f2.write(num+","+url+"\n")
        f2.close()
    print((datetime.datetime.now()-startTime).seconds,"\t",datetime.datetime.now().replace(microsecond=0),"\t",year,num,url)

In [133]:
def main(filename,year=""):
    
    fields			= ["slno","p1","p2","p3","p4","p5","p6","p7","p8","p9","p10","p11","p12","p13","url"]
    csvFilename		= "yearMetrics/tempMpUrlMetrics"+str(year)+".csv"
    csvFile			= open(csvFilename,"a+")
    csvWriter		= csv.writer(csvFile)
    csvWriter.writerow(fields)
    csvFile.close()
    
    fields			= ["id","urls"]
    csvFilename		= "yearMetrics/CorruptUrls"+str(year)+".csv"
    csvFile			= open(csvFilename,"a+")
    csvWriter		= csv.writer(csvFile)
    csvWriter.writerow(fields)
    csvFile.close()
    
    csvFile			= open(filename,"r")
    urlFile			= csv.DictReader(csvFile)
    driver			= setDriverOptions()
    """
    manager 		= mp.Manager()
    urls 			= manager.list()
    results 		= manager.list()
    pool 			= mp.Pool(1)
    results 		= pool.map_async(getMetrics, urlFile)
    while not results.ready():
        pass
    """
    for url in urlFile:
        getMetrics(url)
    csvFile.close()

In [136]:
for year in range(2002,2017):
    filename="yearUrlWb/Wb"+str(year)+".csv"
    year=str(year)
    main(filename,year)

16 	 2019-05-12 08:07:14 	 2002 1 https://web.archive.org/web/20020101000000/http://www.tolerance.org
20 	 2019-05-12 08:07:35 	 2002 2 https://web.archive.org/web/20020101000000/http://www.peta.org
19 	 2019-05-12 08:07:54 	 2002 3 https://web.archive.org/web/20020101000000/http://www.alternet.org
15 	 2019-05-12 08:08:09 	 2002 4 https://web.archive.org/web/20020101000000/http://www.corpwatch.org
13 	 2019-05-12 08:08:23 	 2002 5 https://web.archive.org/web/20020101000000/http://www.idealist.org
14 	 2019-05-12 08:08:38 	 2002 6 https://web.archive.org/web/20020101000000/http://www.guggenheim.com
20 	 2019-05-12 08:08:58 	 2002 7 https://web.archive.org/web/20020101000000/http://www.kazaa.com
19 	 2019-05-12 08:09:18 	 2002 8 https://web.archive.org/web/20020101000000/http://www.120seconds.com
11 	 2019-05-12 08:09:29 	 2002 9 https://web.archive.org/web/20020101000000/http://www.bthere.tv
15 	 2019-05-12 08:09:45 	 2002 10 https://web.archive.org/web/20020101000000/http://www.oddcas

20 	 2019-05-12 08:29:56 	 2002 57 https://web.archive.org/web/20020101000000/http://www.diynet.com
24 	 2019-05-12 08:30:20 	 2002 58 https://web.archive.org/web/20020101000000/http://www.greenhome.com
24 	 2019-05-12 08:30:44 	 2002 59 https://web.archive.org/web/20020101000000/http://www.nationalgardening.com
65 	 2019-05-12 08:31:50 	 2002 60 https://web.archive.org/web/20020101000000/http://www.timeoutny.com
11 	 2019-05-12 08:32:01 	 2002 61 https://web.archive.org/web/20020101000000/http://www.donniedarko.com
27 	 2019-05-12 08:32:29 	 2002 62 https://web.archive.org/web/20020101000000/http://www.ifilm.com
21 	 2019-05-12 08:32:51 	 2002 63 https://web.archive.org/web/20020101000000/http://www.metacritic.com
14 	 2019-05-12 08:33:05 	 2002 64 https://web.archive.org/web/20020101000000/http://www.wakinglifemovie.com
15 	 2019-05-12 08:33:21 	 2002 65 https://web.archive.org/web/20020101000000/http://www.withoutabox.com
18 	 2019-05-12 08:33:39 	 2002 66 https://web.archive.org/we

16 	 2019-05-12 08:55:15 	 2002 130 https://web.archive.org/web/20020101000000/http://www.getty.edu/art/exhibitions/devices
18 	 2019-05-12 08:55:34 	 2002 131 https://web.archive.org/web/20020101000000/http://www.planettribes.com/allyourbase
16 	 2019-05-12 08:55:50 	 2002 132 https://web.archive.org/web/20020101000000/http://www.cutoffmyfeet.com
16 	 2019-05-12 08:56:07 	 2002 133 https://web.archive.org/web/20020101000000/http://www.mindcontrolforums.com
14 	 2019-05-12 08:56:21 	 2002 134 https://web.archive.org/web/20020101000000/http://www.spamradio.com
24 	 2019-05-12 08:56:46 	 2002 135 https://web.archive.org/web/20020101000000/http://www.ology.amnh.org
20 	 2019-05-12 08:57:06 	 2002 136 https://web.archive.org/web/20020101000000/http://www.channelone.com
13 	 2019-05-12 08:57:20 	 2002 137 https://web.archive.org/web/20020101000000/http://www.pinholespy.com
13 	 2019-05-12 08:57:34 	 2002 138 https://web.archive.org/web/20020101000000/http://www.sfskids.com
Traceback (most r

18 	 2019-05-12 09:15:49 	 2003 38 https://web.archive.org/web/20030101000000/http://www.atitd.com
Hiding wb toolbar error
25 	 2019-05-12 09:16:14 	 2003 39 https://web.archive.org/web/20030101000000/http://www.toontown.com
17 	 2019-05-12 09:16:31 	 2003 40 https://web.archive.org/web/20030101000000/http://www.indiegamejam.com
15 	 2019-05-12 09:16:47 	 2003 41 https://web.archive.org/web/20030101000000/http://www.nasa.gov
18 	 2019-05-12 09:17:06 	 2003 42 https://web.archive.org/web/20030101000000/http://www.un.org
20 	 2019-05-12 09:17:26 	 2003 43 https://web.archive.org/web/20030101000000/http://www.weather.gov
23 	 2019-05-12 09:17:50 	 2003 44 https://web.archive.org/web/20030101000000/http://www.publius.org
25 	 2019-05-12 09:18:16 	 2003 45 https://web.archive.org/web/20030101000000/http://www.comune.bologna.it
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 20, in getMetrics
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elem

23 	 2019-05-12 09:38:15 	 2003 88 https://web.archive.org/web/20030101000000/http://www.congress.org
15 	 2019-05-12 09:38:31 	 2003 89 https://web.archive.org/web/20030101000000/http://www.ewg.org
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 42, in getMetrics
    fontCount				= get_font_count(driver,soup)#Parameter 11
  File "<ipython-input-104-4b51463ec027>", line 13, in get_font_count
    fontStr+=d.execute_script(script+'["fontSize"];')+"fontSize"
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 636, in execute_script
    'args': converted_args})['value']
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktra

14 	 2019-05-12 10:01:20 	 2004 3 https://web.archive.org/web/20040101000000/http://www.aworldconnected.com
17 	 2019-05-12 10:01:38 	 2004 4 https://web.archive.org/web/20040101000000/http://www.earthisland.org
31 	 2019-05-12 10:02:09 	 2004 5 https://web.archive.org/web/20040101000000/http://www.indybay.org/
13 	 2019-05-12 10:02:22 	 2004 6 https://web.archive.org/web/20040101000000/http://www.pbs.org/pov/borders/index_flash.html
15 	 2019-05-12 10:02:38 	 2004 7 https://web.archive.org/web/20040101000000/http://www.noggin.com
13 	 2019-05-12 10:02:51 	 2004 8 https://web.archive.org/web/20040101000000/http://www.cbcradio3.com
15 	 2019-05-12 10:03:07 	 2004 9 https://web.archive.org/web/20040101000000/http://www.heavy.com
Hiding wb toolbar error
24 	 2019-05-12 10:03:31 	 2004 10 https://web.archive.org/web/20040101000000/http://zed.cbc.ca
17 	 2019-05-12 10:03:48 	 2004 11 https://web.archive.org/web/20040101000000/http://www.apple.com/itunes/store/shop.html
22 	 2019-05-12 10:04

13 	 2019-05-12 10:24:25 	 2004 74 https://web.archive.org/web/20040101000000/http://www.opensorcery.net/velvet-strike/
25 	 2019-05-12 10:24:51 	 2004 75 https://web.archive.org/web/20040101000000/http://blogforamerica.com
25 	 2019-05-12 10:25:16 	 2004 76 https://web.archive.org/web/20040101000000/http://www.opensecrets.org
23 	 2019-05-12 10:25:39 	 2004 77 https://web.archive.org/web/20040101000000/http://www.eff.org/
25 	 2019-05-12 10:26:04 	 2004 78 https://web.archive.org/web/20040101000000/http://www.meetup.com
Hiding wb toolbar error
40 	 2019-05-12 10:26:44 	 2004 79 https://web.archive.org/web/20040101000000/http://www.nytimes.com/campaigns
16 	 2019-05-12 10:27:01 	 2004 80 https://web.archive.org/web/20040101000000/http://www.fray.com
Hiding wb toolbar error
36 	 2019-05-12 10:27:37 	 2004 81 https://web.archive.org/web/20040101000000/http://www.nationalgeographic.com/magazine
19 	 2019-05-12 10:27:57 	 2004 82 https://web.archive.org/web/20040101000000/http://www.altern

Hiding wb toolbar error
22 	 2019-05-12 10:49:06 	 2004 139 https://web.archive.org/web/20040101000000/http://www.wikipedia.org
15 	 2019-05-12 10:49:23 	 2005 1 https://web.archive.org/web/20050101000000/http://www.worldcitizenguide.com/
Hiding wb toolbar error
20 	 2019-05-12 10:49:43 	 2005 2 https://web.archive.org/web/20050101000000/http://www.ifaw.org
17 	 2019-05-12 10:50:00 	 2005 3 https://web.archive.org/web/20050101000000/http://www.migrationinformation.org
24 	 2019-05-12 10:50:25 	 2005 4 https://web.archive.org/web/20050101000000/http://www.voicesofcivilrights.org
24 	 2019-05-12 10:50:49 	 2005 5 https://web.archive.org/web/20050101000000/http://www.retrovsmetro.org
Hiding wb toolbar error
22 	 2019-05-12 10:51:12 	 2005 6 https://web.archive.org/web/20050101000000/http://grafarc.org
16 	 2019-05-12 10:51:29 	 2005 7 https://web.archive.org/web/20050101000000/http://www.wordcount.org
18 	 2019-05-12 10:51:47 	 2005 8 https://web.archive.org/web/20050101000000/http://www.

16 	 2019-05-12 11:09:03 	 2005 54 https://web.archive.org/web/20050101000000/http://www.ngsednet.org
30 	 2019-05-12 11:09:34 	 2005 55 https://web.archive.org/web/20050101000000/http://www.suicidegirls.com
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 42, in getMetrics
    fontCount				= get_font_count(driver,soup)#Parameter 11
  File "<ipython-input-104-4b51463ec027>", line 17, in get_font_count
    fontStr+=d.execute_script(script+'["fontVariantCaps"];')+"fontVariantCaps"
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 636, in execute_script
    'args': converted_args})['value']
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(me

Hiding wb toolbar error
29 	 2019-05-12 11:24:28 	 2005 94 https://web.archive.org/web/20050101000000/http://www.etrade.com
17 	 2019-05-12 11:24:46 	 2005 95 https://web.archive.org/web/20050101000000/http://www.emeraldnuts.com
22 	 2019-05-12 11:25:08 	 2005 96 https://web.archive.org/web/20050101000000/http://www.sobebev.com
18 	 2019-05-12 11:25:26 	 2005 97 https://web.archive.org/web/20050101000000/http://www.2424milk.com
13 	 2019-05-12 11:25:40 	 2005 98 https://web.archive.org/web/20050101000000/http://www.campoviejo-usa.com
17 	 2019-05-12 11:25:58 	 2005 99 https://web.archive.org/web/20050101000000/http://www.bonnydoonvineyard.com
16 	 2019-05-12 11:26:14 	 2005 100 https://web.archive.org/web/20050101000000/http://www.metaphorical.net
21 	 2019-05-12 11:26:35 	 2005 101 https://web.archive.org/web/20050101000000/http://www.miniclip.com
Hiding wb toolbar error
24 	 2019-05-12 11:27:00 	 2005 102 https://web.archive.org/web/20050101000000/http://www.hotshotbusiness.com
14 	 

28 	 2019-05-12 11:51:41 	 2005 167 https://web.archive.org/web/20050101000000/http://www.live365.com
20 	 2019-05-12 11:52:01 	 2005 168 https://web.archive.org/web/20050101000000/http://www.keepingscore.org
25 	 2019-05-12 11:52:26 	 2005 169 https://web.archive.org/web/20050101000000/http://www.truesoundlounge.com
31 	 2019-05-12 11:52:57 	 2005 170 https://web.archive.org/web/20050101000000/http://news.bbc.co.uk/
Hiding wb toolbar error
30 	 2019-05-12 11:53:28 	 2005 171 https://web.archive.org/web/20050101000000/http://www.weather.com
20 	 2019-05-12 11:53:49 	 2005 172 https://web.archive.org/web/20050101000000/http://www.pbs.org/frontlineworld/
Hiding wb toolbar error
21 	 2019-05-12 11:54:10 	 2005 173 https://web.archive.org/web/20050101000000/http://www.americanradioworks.org
26 	 2019-05-12 11:54:37 	 2005 174 https://web.archive.org/web/20050101000000/http://www.wired.com
24 	 2019-05-12 11:55:02 	 2005 175 https://web.archive.org/web/20050101000000/http://www.guardian.co.

Hiding wb toolbar error
32 	 2019-05-12 12:12:54 	 2005 211 https://web.archive.org/web/20050101000000/http://www.SpiritualityHealth.com
23 	 2019-05-12 12:13:17 	 2005 212 https://web.archive.org/web/20050101000000/http://www.GraceCathedral.org
Hiding wb toolbar error
23 	 2019-05-12 12:13:41 	 2005 213 https://web.archive.org/web/20050101000000/http://www.faithstreams.com
Hiding wb toolbar error
23 	 2019-05-12 12:14:05 	 2005 214 https://web.archive.org/web/20050101000000/http://www.myjewishlearning.com
15 	 2019-05-12 12:14:20 	 2005 215 https://web.archive.org/web/20050101000000/http://www.chipotle.com
15 	 2019-05-12 12:14:35 	 2005 216 https://web.archive.org/web/20050101000000/http://www.kasbah.com.hk
16 	 2019-05-12 12:14:52 	 2005 217 https://web.archive.org/web/20050101000000/http://www.foxsportsgrill.com
13 	 2019-05-12 12:15:05 	 2005 218 https://web.archive.org/web/20050101000000/http://www.lowcarbburger.com
17 	 2019-05-12 12:15:22 	 2005 219 https://web.archive.org/web/

24 	 2019-05-12 12:42:07 	 2005 283 https://web.archive.org/web/20050101000000/http://www.startheregoplaces.com
13 	 2019-05-12 12:42:21 	 2005 284 https://web.archive.org/web/20050101000000/http://www.gogirlsonly.org/
15 	 2019-05-12 12:42:36 	 2005 285 https://web.archive.org/web/20050101000000/http://www.mcsweeneys.net
17 	 2019-05-12 12:42:53 	 2005 286 https://web.archive.org/web/20050101000000/http://www.fffbi.com
19 	 2019-05-12 12:43:12 	 2005 287 https://web.archive.org/web/20050101000000/http://www.televisionwithoutpity.com/
25 	 2019-05-12 12:43:37 	 2005 288 https://web.archive.org/web/20050101000000/http://www.discover.com
23 	 2019-05-12 12:44:01 	 2005 289 https://web.archive.org/web/20050101000000/http://www.motherjones.com
17 	 2019-05-12 12:44:18 	 2005 290 https://web.archive.org/web/20050101000000/http://www.wordsatplay.com
Hiding wb toolbar error
21 	 2019-05-12 12:44:39 	 2005 291 https://web.archive.org/web/20050101000000/http://www.villasanmichele.com
16 	 2019-

25 	 2019-05-12 13:00:55 	 2006 23 https://web.archive.org/web/20060101000000/http://www.barclays.co.uk/
Hiding wb toolbar error
23 	 2019-05-12 13:01:18 	 2006 24 https://web.archive.org/web/20060101000000/http://www.ingdirect.com/
Hiding wb toolbar error
27 	 2019-05-12 13:01:46 	 2006 25 https://web.archive.org/web/20060101000000/http://www.wamu.com
24 	 2019-05-12 13:02:10 	 2006 26 https://web.archive.org/web/20060101000000/http://www.redkencolor.com
15 	 2019-05-12 13:02:25 	 2006 27 https://web.archive.org/web/20060101000000/http://www.lorealusa.com
Hiding wb toolbar error
30 	 2019-05-12 13:02:56 	 2006 28 https://web.archive.org/web/20060101000000/http://www.h2oplus.com
14 	 2019-05-12 13:03:11 	 2006 29 https://web.archive.org/web/20060101000000/http://www.giorgioarmanibeauty.com
Hiding wb toolbar error
24 	 2019-05-12 13:03:36 	 2006 30 https://web.archive.org/web/20060101000000/http://www.kiehls.com
27 	 2019-05-12 13:04:03 	 2006 31 https://web.archive.org/web/200601010000

20 	 2019-05-12 13:30:54 	 2006 96 https://web.archive.org/web/20060101000000/http://www.kidsdata.org
16 	 2019-05-12 13:31:11 	 2006 97 https://web.archive.org/web/20060101000000/http://www.greatschools.net
15 	 2019-05-12 13:31:27 	 2006 98 https://web.archive.org/web/20060101000000/http://www.kidshealthgalaxy.com
32 	 2019-05-12 13:32:00 	 2006 99 https://web.archive.org/web/20060101000000/http://www.style.com
12 	 2019-05-12 13:32:13 	 2006 100 https://web.archive.org/web/20060101000000/http://www.anothermag.com
25 	 2019-05-12 13:32:38 	 2006 101 https://web.archive.org/web/20060101000000/http://www.marcjacobs.com
19 	 2019-05-12 13:32:57 	 2006 102 https://web.archive.org/web/20060101000000/http://www.nylonmag.com
Hiding wb toolbar error
22 	 2019-05-12 13:33:20 	 2006 103 https://web.archive.org/web/20060101000000/http://strip-dev.fuelindustries.com/
25 	 2019-05-12 13:33:45 	 2006 104 https://web.archive.org/web/20060101000000/http://www.paypal.com
21 	 2019-05-12 13:34:07 	 20

Error scraping the Url
40 	 2019-05-12 13:58:07 	 2006 151 https://web.archive.org/web/20060101000000/http://www.westfieldinsurance.com
40 	 2019-05-12 13:58:48 	 2006 152 https://web.archive.org/web/20060101000000/http://www.mylifepath.com
32 	 2019-05-12 13:59:20 	 2006 153 https://web.archive.org/web/20060101000000/http://www.bcbsri.com
19 	 2019-05-12 13:59:40 	 2006 154 https://web.archive.org/web/20060101000000/http://www.justicelearning.org
21 	 2019-05-12 14:00:01 	 2006 155 https://web.archive.org/web/20060101000000/http://jurist.law.pitt.edu
24 	 2019-05-12 14:00:25 	 2006 156 https://web.archive.org/web/20060101000000/http://www.courttv.com
26 	 2019-05-12 14:00:52 	 2006 157 https://web.archive.org/web/20060101000000/http://www.nolo.com
24 	 2019-05-12 14:01:16 	 2006 158 https://web.archive.org/web/20060101000000/http://www.findlaw.com
260 	 2019-05-12 14:05:37 	 2006 159 https://web.archive.org/web/20060101000000/http://www.epicurious.com
22 	 2019-05-12 14:06:00 	 2006 1

31 	 2019-05-12 14:32:31 	 2006 226 https://web.archive.org/web/20060101000000/http://www.homepages.com
15 	 2019-05-12 14:32:46 	 2006 227 https://web.archive.org/web/20060101000000/http://www.npr.org/awards/2005/webby10.html
32 	 2019-05-12 14:33:19 	 2006 228 https://web.archive.org/web/20060101000000/http://wie.org
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 20, in getMetrics
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/support/wait.py", line 71, in until
    value = method(self._driver)
  File "<ipython-input-134-914ecd78e0ec>", line 20, in <lambda>
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 546, in find_elements_by_tag_name
    return self.find_elements(by=By.TAG_NAME, 

14 	 2019-05-12 14:56:22 	 2006 283 https://web.archive.org/web/20060101000000/http://www.earthcam.com
19 	 2019-05-12 14:56:41 	 2006 284 https://web.archive.org/web/20060101000000/http://www.madeinmtl.com
Hiding wb toolbar error
24 	 2019-05-12 14:57:05 	 2006 285 https://web.archive.org/web/20060101000000/http://selectitaly.com
17 	 2019-05-12 14:57:22 	 2006 286 https://web.archive.org/web/20060101000000/http://www.space.australia.com/
Hiding wb toolbar error
27 	 2019-05-12 14:57:50 	 2006 287 https://web.archive.org/web/20060101000000/http://www.expedia.com
22 	 2019-05-12 14:58:13 	 2006 288 https://web.archive.org/web/20060101000000/http://www.tripadvisor.com
22 	 2019-05-12 14:58:35 	 2006 289 https://web.archive.org/web/20060101000000/http://www.lonelyplanet.com
14 	 2019-05-12 14:58:50 	 2006 290 https://web.archive.org/web/20060101000000/http://www.coolcapitals.com
41 	 2019-05-12 14:59:32 	 2006 291 https://web.archive.org/web/20060101000000/http://maps.google.com
Hiding w

19 	 2019-05-12 15:15:46 	 2007 2 https://web.archive.org/web/20070101000000/http://www.savetheinternet.com/
19 	 2019-05-12 15:16:06 	 2007 3 https://web.archive.org/web/20070101000000/http://MyWonderfulWorld.org
30 	 2019-05-12 15:16:36 	 2007 4 https://web.archive.org/web/20070101000000/http://www.theirc.org
17 	 2019-05-12 15:16:53 	 2007 5 https://web.archive.org/web/20070101000000/http://www.makeupmakeout.com
14 	 2019-05-12 15:17:07 	 2007 6 https://web.archive.org/web/20070101000000/http://www.electrolux.com/designlab/
16 	 2019-05-12 15:17:23 	 2007 7 https://web.archive.org/web/20070101000000/http://peoplesdesignaward.org
21 	 2019-05-12 15:17:45 	 2007 8 https://web.archive.org/web/20070101000000/http://www.interacttenways.com/usa/home.asp
21 	 2019-05-12 15:18:06 	 2007 9 https://web.archive.org/web/20070101000000/http://www.universalleonardo.org/
11 	 2019-05-12 15:18:17 	 2007 10 https://web.archive.org/web/20070101000000/http://www.neue-digitale.de/projects/summer-of-lov

20 	 2019-05-12 15:44:41 	 2007 75 https://web.archive.org/web/20070101000000/http://www.WouldYouLikeAWebsite.com
32 	 2019-05-12 15:45:13 	 2007 76 https://web.archive.org/web/20070101000000/http://www.photography.si.edu
22 	 2019-05-12 15:45:36 	 2007 77 https://web.archive.org/web/20070101000000/http://www.SmithsonianEducation.org
14 	 2019-05-12 15:45:51 	 2007 78 https://web.archive.org/web/20070101000000/http://www.grupowprojects.com/museo
24 	 2019-05-12 15:46:15 	 2007 79 https://web.archive.org/web/20070101000000/http://www.newseum.org
14 	 2019-05-12 15:46:30 	 2007 80 https://web.archive.org/web/20070101000000/http://www.momaonline.org/
31 	 2019-05-12 15:47:02 	 2007 81 https://web.archive.org/web/20070101000000/http://www.howstuffworks.com
14 	 2019-05-12 15:47:16 	 2007 82 https://web.archive.org/web/20070101000000/http://artsedge.kennedy-center.org/shadowpuppets/artsedge.html
20 	 2019-05-12 15:47:37 	 2007 83 https://web.archive.org/web/20070101000000/http://www.discove

24 	 2019-05-12 16:10:59 	 2007 138 https://web.archive.org/web/20070101000000/http://meetyourpotential.ballyfitness.com/
29 	 2019-05-12 16:11:29 	 2007 139 https://web.archive.org/web/20070101000000/http://www.eatbetteramerica.com
19 	 2019-05-12 16:11:48 	 2007 140 https://web.archive.org/web/20070101000000/http://www.teenwire.com
Hiding wb toolbar error
36 	 2019-05-12 16:12:25 	 2007 141 https://web.archive.org/web/20070101000000/http://www.theonion.com/
45 	 2019-05-12 16:13:11 	 2007 142 https://web.archive.org/web/20070101000000/http://www.CollegeHumor.com
28 	 2019-05-12 16:13:39 	 2007 143 https://web.archive.org/web/20070101000000/http://www.comedycentral.com/shows/the_colbert_report/index.jhtml
14 	 2019-05-12 16:13:53 	 2007 144 https://web.archive.org/web/20070101000000/http://www.shaveeverywhere.com
16 	 2019-05-12 16:14:10 	 2007 145 https://web.archive.org/web/20070101000000/http://www.veryfunnyads.com
13 	 2019-05-12 16:14:24 	 2007 146 https://web.archive.org/web/200

Hiding wb toolbar error
40 	 2019-05-12 16:29:53 	 2007 186 https://web.archive.org/web/20070101000000/http://www.bbc.co.uk/news
38 	 2019-05-12 16:30:32 	 2007 187 https://web.archive.org/web/20070101000000/http://www.truthdig.com/
Hiding wb toolbar error
39 	 2019-05-12 16:31:11 	 2007 188 https://web.archive.org/web/20070101000000/http://www.reuters.com
15 	 2019-05-12 16:31:27 	 2007 189 https://web.archive.org/web/20070101000000/http://www.npr.org/awards/2006/webby2.html
20 	 2019-05-12 16:31:47 	 2007 190 https://web.archive.org/web/20070101000000/http://www.salon.com
35 	 2019-05-12 16:32:23 	 2007 191 https://web.archive.org/web/20070101000000/http://www.guardian.co.uk
51 	 2019-05-12 16:33:14 	 2007 192 https://web.archive.org/web/20070101000000/http://www.nytimes.com/
41 	 2019-05-12 16:33:55 	 2007 193 https://web.archive.org/web/20070101000000/http://www.variety.com
Hiding wb toolbar error
54 	 2019-05-12 16:34:49 	 2007 194 https://web.archive.org/web/20070101000000/http:/

15 	 2019-05-12 16:57:00 	 2007 246 https://web.archive.org/web/20070101000000/http://redhot.iu.edu/
17 	 2019-05-12 16:57:18 	 2007 247 https://web.archive.org/web/20070101000000/http://virtualvisit.trinity.edu
16 	 2019-05-12 16:57:35 	 2007 248 https://web.archive.org/web/20070101000000/http://www.sva.edu
17 	 2019-05-12 16:57:53 	 2007 249 https://web.archive.org/web/20070101000000/http://www.expression.edu/
18 	 2019-05-12 16:58:12 	 2007 250 https://web.archive.org/web/20070101000000/http://www.fullsail.edu
23 	 2019-05-12 16:58:35 	 2007 251 https://web.archive.org/web/20070101000000/http://hubblesite.org
22 	 2019-05-12 16:58:58 	 2007 252 https://web.archive.org/web/20070101000000/http://www.pbs.org/wgbh/nova/sciencenow
21 	 2019-05-12 16:59:19 	 2007 253 https://web.archive.org/web/20070101000000/http://www.jpl.nasa.gov
14 	 2019-05-12 16:59:34 	 2007 254 https://web.archive.org/web/20070101000000/http://www.understandingrace.org
15 	 2019-05-12 16:59:50 	 2007 255 https://we

25 	 2019-05-12 17:21:43 	 2007 311 https://web.archive.org/web/20070101000000/http://www.nickelodeon.com.au
52 	 2019-05-12 17:22:35 	 2007 312 https://web.archive.org/web/20070101000000/http://www.dkny.com
16 	 2019-05-12 17:22:52 	 2007 313 https://web.archive.org/web/20070101000000/http://www.fullsail.edu
14 	 2019-05-12 17:23:06 	 2007 314 https://web.archive.org/web/20070101000000/http://demo.fb.se/e/ikea/dreamkitchen/site/default.html
14 	 2019-05-12 17:23:20 	 2007 315 https://web.archive.org/web/20070101000000/http://www.wefeelfine.org
28 	 2019-05-12 17:23:49 	 2007 316 https://web.archive.org/web/20070101000000/http://www.bestuff.com/
Hiding wb toolbar error
33 	 2019-05-12 17:24:22 	 2007 317 https://web.archive.org/web/20070101000000/http://www.adobe.com/go/flashtimeline
15 	 2019-05-12 17:24:38 	 2007 318 https://web.archive.org/web/20070101000000/http://www.peroniitaly.com/gb/flash.html
17 	 2019-05-12 17:24:56 	 2007 319 https://web.archive.org/web/20070101000000/http:/

21 	 2019-05-12 17:46:40 	 2008 36 https://web.archive.org/web/20080101000000/http://dynamic.abc.go.com/streaming/landing
17 	 2019-05-12 17:46:57 	 2008 37 https://web.archive.org/web/20080101000000/http://www.stagework.org/mckellen
18 	 2019-05-12 17:47:15 	 2008 38 https://web.archive.org/web/20080101000000/http://www.mediastorm.org
14 	 2019-05-12 17:47:30 	 2008 39 https://web.archive.org/web/20080101000000/http://www.nyc.gov/html/nycmg/nyctvod/html/home/home.html?bcpid=988092805&bclid=992358455
18 	 2019-05-12 17:47:49 	 2008 40 https://web.archive.org/web/20080101000000/http://vbs.tv
20 	 2019-05-12 17:48:09 	 2008 41 https://web.archive.org/web/20080101000000/http://www.annielennox.com
42 	 2019-05-12 17:48:52 	 2008 42 https://web.archive.org/web/20080101000000/http://www.bestweekever.tv/
66 	 2019-05-12 17:49:59 	 2008 43 https://web.archive.org/web/20080101000000/http://www.cnn.com/CNN/Programs/anderson.cooper.360/
Hiding wb toolbar error
34 	 2019-05-12 17:50:34 	 2008 44 h

Error scraping the Url
21 	 2019-05-12 18:06:03 	 2008 88 https://web.archive.org/web/20080101000000/http://SproutOnline.com
16 	 2019-05-12 18:06:19 	 2008 89 https://web.archive.org/web/20080101000000/http://journeys.louisvuitton.com
41 	 2019-05-12 18:07:00 	 2008 90 https://web.archive.org/web/20080101000000/http://style.com
Hiding wb toolbar error
75 	 2019-05-12 18:08:15 	 2008 91 https://web.archive.org/web/20080101000000/http://www.GQ.com
19 	 2019-05-12 18:08:35 	 2008 92 https://web.archive.org/web/20080101000000/http://www.postvisual.com/awards/2008/webby/beanpole.asp
28 	 2019-05-12 18:09:04 	 2008 93 https://web.archive.org/web/20080101000000/http://www.refinery29.com
Hiding wb toolbar error
28 	 2019-05-12 18:09:32 	 2008 94 https://web.archive.org/web/20080101000000/http://zopa.com
22 	 2019-05-12 18:09:54 	 2008 95 https://web.archive.org/web/20080101000000/http://mint.com/
19 	 2019-05-12 18:10:13 	 2008 96 https://web.archive.org/web/20080101000000/http://www.micropla

14 	 2019-05-12 18:29:31 	 2008 138 https://web.archive.org/web/20080101000000/http://misc.starring.se/competitions/trygghansa/
16 	 2019-05-12 18:29:48 	 2008 139 https://web.archive.org/web/20080101000000/http://dontbebob.com
24 	 2019-05-12 18:30:13 	 2008 140 https://web.archive.org/web/20080101000000/http://blueshieldca.com
19 	 2019-05-12 18:30:32 	 2008 141 https://web.archive.org/web/20080101000000/http://www.hartfordinvestor.com/straighttalk
19 	 2019-05-12 18:30:51 	 2008 142 https://web.archive.org/web/20080101000000/http://Youcovered.com
20 	 2019-05-12 18:31:11 	 2008 143 https://web.archive.org/web/20080101000000/http://www.out-law.com
33 	 2019-05-12 18:31:44 	 2008 144 https://web.archive.org/web/20080101000000/http://www.abajournal.com
19 	 2019-05-12 18:32:04 	 2008 145 https://web.archive.org/web/20080101000000/http://jurist.law.pitt.edu
18 	 2019-05-12 18:32:23 	 2008 146 https://web.archive.org/web/20080101000000/http://www.lawhelp.org
Hiding wb toolbar error
27 	 

20 	 2019-05-12 18:58:30 	 2008 197 https://web.archive.org/web/20080101000000/http://www.FactCheck.org
36 	 2019-05-12 18:59:07 	 2008 198 https://web.archive.org/web/20080101000000/http://www.truthdig.com
26 	 2019-05-12 18:59:33 	 2008 199 https://web.archive.org/web/20080101000000/http://www.npr.org/awards/2007/webby12.html
22 	 2019-05-12 18:59:56 	 2008 200 https://web.archive.org/web/20080101000000/http://pewforum.org/religion08/
18 	 2019-05-12 19:00:14 	 2008 201 https://web.archive.org/web/20080101000000/http://www.maplight.org
14 	 2019-05-12 19:00:28 	 2008 202 https://web.archive.org/web/20080101000000/http://www.action-marketing-group.com
13 	 2019-05-12 19:00:42 	 2008 203 https://web.archive.org/web/20080101000000/http://www.cake-factory.com
15 	 2019-05-12 19:00:57 	 2008 204 https://web.archive.org/web/20080101000000/http://www.checklandkindleysides.com
16 	 2019-05-12 19:01:13 	 2008 205 https://web.archive.org/web/20080101000000/http://www.radicalmedia.com
14 	 2019

16 	 2019-05-12 19:24:29 	 2008 270 https://web.archive.org/web/20080101000000/http://www.thesedays.com/awards2007/nokia_beeldigewensen
21 	 2019-05-12 19:24:51 	 2008 271 https://web.archive.org/web/20080101000000/http://www.discoverychannel.co.uk/ivideogame/
23 	 2019-05-12 19:25:14 	 2008 272 https://web.archive.org/web/20080101000000/http://www.noggin.com
19 	 2019-05-12 19:25:34 	 2008 273 https://web.archive.org/web/20080101000000/http://www.sundancechannel.com/iconoclasts
16 	 2019-05-12 19:25:50 	 2008 274 https://web.archive.org/web/20080101000000/http://www.scifi.com/tinman/oz/
Hiding wb toolbar error
22 	 2019-05-12 19:26:12 	 2008 275 https://web.archive.org/web/20080101000000/http://www.furuvik.se
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 20, in getMetrics
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdrive

21 	 2019-05-12 19:45:34 	 2008 308 https://web.archive.org/web/20080101000000/http://www.flickr.com/
32 	 2019-05-12 19:46:07 	 2008 309 https://web.archive.org/web/20080101000000/http://www.digg.com
32 	 2019-05-12 19:46:40 	 2008 310 https://web.archive.org/web/20080101000000/http://www.facebook.com
53 	 2019-05-12 19:47:33 	 2008 311 https://web.archive.org/web/20080101000000/http://nytimes.com/
Hiding wb toolbar error
46 	 2019-05-12 19:48:19 	 2008 312 https://web.archive.org/web/20080101000000/http://www.yelp.com
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 20, in getMetrics
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/support/wait.py", line 80, in until
    raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message: 


Error scraping the Url
37 	 2019-05-12 19:4

14 	 2019-05-12 20:06:06 	 2009 22 https://web.archive.org/web/20090101000000/http://www.ourentry.nl/#
17 	 2019-05-12 20:06:24 	 2009 23 https://web.archive.org/web/20090101000000/http://www.rudder.com
21 	 2019-05-12 20:06:45 	 2009 24 https://web.archive.org/web/20090101000000/http://xero.com
14 	 2019-05-12 20:07:00 	 2009 25 https://web.archive.org/web/20090101000000/http://www.bbhgraze.com/awards/webbys_09/axehair_website/
34 	 2019-05-12 20:07:35 	 2009 26 https://web.archive.org/web/20090101000000/http://www.maccosmetics.com/
Hiding wb toolbar error
23 	 2019-05-12 20:07:58 	 2009 27 https://web.archive.org/web/20090101000000/http://www.8x4.de
Hiding wb toolbar error
31 	 2019-05-12 20:08:29 	 2009 28 https://web.archive.org/web/20090101000000/http://66.245.160.26/philips/manalogues/index.html
Hiding wb toolbar error
18 	 2019-05-12 20:08:48 	 2009 29 https://web.archive.org/web/20090101000000/http://www.whitewashhouse.co.uk
Hiding wb toolbar error
74 	 2019-05-12 20:10:02 	 20

36 	 2019-05-12 20:38:27 	 2009 82 https://web.archive.org/web/20090101000000/http://www.monster.com
27 	 2019-05-12 20:38:55 	 2009 83 https://web.archive.org/web/20090101000000/http://nypdrecruit.com
20 	 2019-05-12 20:39:15 	 2009 84 https://web.archive.org/web/20090101000000/http://www.visualcv.com
29 	 2019-05-12 20:39:45 	 2009 85 https://web.archive.org/web/20090101000000/http://www.tes.co.uk/
60 	 2019-05-12 20:40:46 	 2009 86 https://web.archive.org/web/20090101000000/http://youtube.com/live
34 	 2019-05-12 20:41:20 	 2009 87 https://web.archive.org/web/20090101000000/http://upcoming.yahoo.com/
Hiding wb toolbar error
61 	 2019-05-12 20:42:22 	 2009 88 https://web.archive.org/web/20090101000000/http://2008.usopen.org
16 	 2019-05-12 20:42:38 	 2009 89 https://web.archive.org/web/20090101000000/http://dev.basikgroup.com/vs_fashion_show_2008_V6/flash.htm
Hiding wb toolbar error
46 	 2019-05-12 20:43:25 	 2009 90 https://web.archive.org/web/20090101000000/http://www.sony.com/ces


Error scraping the Url
13 	 2019-05-12 21:05:53 	 2009 122 https://web.archive.org/web/20090101000000/http://misionecologica.com.mx/
25 	 2019-05-12 21:06:19 	 2009 123 https://web.archive.org/web/20090101000000/http://blogs.ushmm.org/worldiswitness
27 	 2019-05-12 21:06:47 	 2009 124 https://web.archive.org/web/20090101000000/http://www.governmentdocs.org/
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 42, in getMetrics
    fontCount				= get_font_count(driver,soup)#Parameter 11
  File "<ipython-input-104-4b51463ec027>", line 8, in get_font_count
    fontStr+=d.execute_script(script+'["font"];')+"font"
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 636, in execute_script
    'args': converted_args})['value']
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local

17 	 2019-05-12 21:29:50 	 2009 172 https://web.archive.org/web/20090101000000/http://www.protect77.com/
26 	 2019-05-12 21:30:16 	 2009 173 https://web.archive.org/web/20090101000000/http://timespentalone.com/
Hiding wb toolbar error
53 	 2019-05-12 21:31:10 	 2009 174 https://web.archive.org/web/20090101000000/http://www.bbc.com/news
Hiding wb toolbar error
20 	 2019-05-12 21:31:31 	 2009 175 https://web.archive.org/web/20090101000000/http://www.spectramsnbc.com
54 	 2019-05-12 21:32:25 	 2009 176 https://web.archive.org/web/20090101000000/http://huffingtonpost.com
108 	 2019-05-12 21:34:13 	 2009 177 https://web.archive.org/web/20090101000000/http://thedailybeast.com
135 	 2019-05-12 21:36:29 	 2009 178 https://web.archive.org/web/20090101000000/http://www.guardian.co.uk/
74 	 2019-05-12 21:37:43 	 2009 179 https://web.archive.org/web/20090101000000/http://NYTimes.com
52 	 2019-05-12 21:38:36 	 2009 180 https://web.archive.org/web/20090101000000/http://www.independent.co.uk
58 	 201

19 	 2019-05-12 22:01:12 	 2009 211 https://web.archive.org/web/20090101000000/http://www.americaabroadmedia.org/
25 	 2019-05-12 22:01:37 	 2009 212 https://web.archive.org/web/20090101000000/http://www.trulia.com
25 	 2019-05-12 22:02:03 	 2009 213 https://web.archive.org/web/20090101000000/http://hotpads.com
36 	 2019-05-12 22:02:40 	 2009 214 https://web.archive.org/web/20090101000000/http://www.bhgrealestate.com/
Hiding wb toolbar error
63 	 2019-05-12 22:03:44 	 2009 215 https://web.archive.org/web/20090101000000/http://NYTimes.com/RealEstate
15 	 2019-05-12 22:03:59 	 2009 216 https://web.archive.org/web/20090101000000/http://www.riverfrontpark.com
119 	 2019-05-12 22:05:59 	 2009 217 https://web.archive.org/web/20090101000000/http://www.guardian.co.uk/commentisfree/belief
Hiding wb toolbar error
39 	 2019-05-12 22:06:39 	 2009 218 https://web.archive.org/web/20090101000000/http://www.pbs.org/religion
23 	 2019-05-12 22:07:03 	 2009 219 https://web.archive.org/web/20090101000000

Error scraping the Url
11 	 2019-05-12 22:30:15 	 2009 261 https://web.archive.org/web/20090101000000/http://demo.perfectfools.com/bbc/case/
17 	 2019-05-12 22:30:32 	 2009 262 https://web.archive.org/web/20090101000000/http://www.hbo.com/films/johnadams/
Hiding wb toolbar error
26 	 2019-05-12 22:30:59 	 2009 263 https://web.archive.org/web/20090101000000/http://www.sundancechannel.com
23 	 2019-05-12 22:31:23 	 2009 264 https://web.archive.org/web/20090101000000/http://www.yogabbagabba.com
28 	 2019-05-12 22:31:51 	 2009 265 https://web.archive.org/web/20090101000000/http://www.tourisme-montreal.org
24 	 2019-05-12 22:32:16 	 2009 266 https://web.archive.org/web/20090101000000/http://www.australia.com
19 	 2019-05-12 22:32:35 	 2009 267 https://web.archive.org/web/20090101000000/http://www.austintexas.org
38 	 2019-05-12 22:33:14 	 2009 268 https://web.archive.org/web/20090101000000/http://www.visitsweden.com
16 	 2019-05-12 22:33:30 	 2009 269 https://web.archive.org/web/20090101000

38 	 2019-05-12 22:58:40 	 2009 312 https://web.archive.org/web/20090101000000/http://www.boston.com/bigpicture
15 	 2019-05-12 22:58:55 	 2009 313 https://web.archive.org/web/20090101000000/http://www.pilobolus.org
16 	 2019-05-12 22:59:11 	 2009 314 https://web.archive.org/web/20090101000000/http://www.lyndonwade.com
16 	 2019-05-12 22:59:28 	 2009 315 https://web.archive.org/web/20090101000000/http://www.livehopelove.com
17 	 2019-05-12 22:59:45 	 2009 316 https://web.archive.org/web/20090101000000/http://www.protect77.com/
15 	 2019-05-12 23:00:00 	 2009 317 https://web.archive.org/web/20090101000000/http://www.wordle.net/
15 	 2019-05-12 23:00:16 	 2009 318 https://web.archive.org/web/20090101000000/http://www.lyndonwade.com
13 	 2019-05-12 23:00:30 	 2009 319 https://web.archive.org/web/20090101000000/http://wkilab.com/awards/work/nikefoundation/index.html
17 	 2019-05-12 23:00:47 	 2009 320 https://web.archive.org/web/20090101000000/http://www.bhf.org.uk/annualreview2008
28 	 20

24 	 2019-05-12 23:14:14 	 2010 23 https://web.archive.org/web/20100101000000/http://promotions.bankofamerica.com/oncampus/themorriscode/
54 	 2019-05-12 23:15:09 	 2010 24 https://web.archive.org/web/20100101000000/http://www.realbeauty.com
19 	 2019-05-12 23:15:28 	 2010 25 https://web.archive.org/web/20100101000000/http://www.getmagnetic.com
17 	 2019-05-12 23:15:46 	 2010 26 https://web.archive.org/web/20100101000000/http://hd-generation.com/
16 	 2019-05-12 23:16:03 	 2010 27 https://web.archive.org/web/20100101000000/http://sephoraclaus.com
51 	 2019-05-12 23:16:55 	 2010 28 https://web.archive.org/web/20100101000000/http://mashable.com
26 	 2019-05-12 23:17:21 	 2010 29 https://web.archive.org/web/20100101000000/http://scholarlykitchen.sspnet.org/
42 	 2019-05-12 23:18:03 	 2010 30 https://web.archive.org/web/20100101000000/http://gigaom.com
40 	 2019-05-12 23:18:43 	 2010 31 https://web.archive.org/web/20100101000000/http://kara.allthingsd.com
40 	 2019-05-12 23:19:24 	 2010 32

56 	 2019-05-12 23:53:59 	 2010 91 https://web.archive.org/web/20100101000000/http://www.babycenter.com
24 	 2019-05-12 23:54:24 	 2010 92 https://web.archive.org/web/20100101000000/http://www.drinkwise.com.au/
28 	 2019-05-12 23:54:53 	 2010 93 https://web.archive.org/web/20100101000000/http://www.parenting.com/
19 	 2019-05-12 23:55:12 	 2010 94 https://web.archive.org/web/20100101000000/http://www.bluebelljeans.com
Hiding wb toolbar error
67 	 2019-05-12 23:56:20 	 2010 95 https://web.archive.org/web/20100101000000/http://www.nytimes.com/tmagazine
25 	 2019-05-12 23:56:45 	 2010 96 https://web.archive.org/web/20100101000000/http://www.hoodieremix.com
36 	 2019-05-12 23:57:22 	 2010 97 https://web.archive.org/web/20100101000000/http://www.refinery29.com
15 	 2019-05-12 23:57:37 	 2010 98 https://web.archive.org/web/20100101000000/http://lift.puma.com/
34 	 2019-05-12 23:58:11 	 2010 99 https://web.archive.org/web/20100101000000/http://www.mint.com/
57 	 2019-05-12 23:59:09 	 2010 100

46 	 2019-05-13 00:27:52 	 2010 156 https://web.archive.org/web/20100101000000/http://www.mnn.com
56 	 2019-05-13 00:28:48 	 2010 157 https://web.archive.org/web/20100101000000/http://www.newyorker.com
Hiding wb toolbar error
35 	 2019-05-13 00:29:24 	 2010 158 https://web.archive.org/web/20100101000000/http://ngm.com
33 	 2019-05-13 00:29:57 	 2010 159 https://web.archive.org/web/20100101000000/http://www.wired.com/magazine
61 	 2019-05-13 00:30:58 	 2010 160 https://web.archive.org/web/20100101000000/http://www.FLYPmedia.com
17 	 2019-05-13 00:31:15 	 2010 161 https://web.archive.org/web/20100101000000/http://thinkingspace.economist.com
19 	 2019-05-13 00:31:35 	 2010 162 https://web.archive.org/web/20100101000000/http://mubi.com
15 	 2019-05-13 00:31:50 	 2010 163 https://web.archive.org/web/20100101000000/http://www.d-9.com
33 	 2019-05-13 00:32:23 	 2010 164 https://web.archive.org/web/20100101000000/http://www.ifc.com/
17 	 2019-05-13 00:32:41 	 2010 165 https://web.archive.org/w

16 	 2019-05-13 01:01:04 	 2010 217 https://web.archive.org/web/20100101000000/http://awards.lbi.co.uk/2010/webby/electrolux/
29 	 2019-05-13 01:01:34 	 2010 218 https://web.archive.org/web/20100101000000/http://www.giraffe.net
Hiding wb toolbar error
20 	 2019-05-13 01:01:54 	 2010 219 https://web.archive.org/web/20100101000000/http://www.imakemycase.com
Hiding wb toolbar error
32 	 2019-05-13 01:02:26 	 2010 220 https://web.archive.org/web/20100101000000/http://www.moo.com
Hiding wb toolbar error
29 	 2019-05-13 01:02:56 	 2010 221 https://web.archive.org/web/20100101000000/https://www.crumplerbags.com
17 	 2019-05-13 01:03:14 	 2010 222 https://web.archive.org/web/20100101000000/http://www.bugaboo.com
24 	 2019-05-13 01:03:38 	 2010 223 https://web.archive.org/web/20100101000000/http://focus.firstbornmultimedia.com/?puma_lift
15 	 2019-05-13 01:03:53 	 2010 224 https://web.archive.org/web/20100101000000/http://community.bucknell.edu/
27 	 2019-05-13 01:04:20 	 2010 225 https://web.a

35 	 2019-05-13 01:28:35 	 2010 279 https://web.archive.org/web/20100101000000/http://www.cuteoverload.com
23 	 2019-05-13 01:28:58 	 2010 280 https://web.archive.org/web/20100101000000/http://kids.nationalgeographic.com/
26 	 2019-05-13 01:29:24 	 2010 281 https://web.archive.org/web/20100101000000/http://pbskids.org
30 	 2019-05-13 01:29:55 	 2010 282 https://web.archive.org/web/20100101000000/http://www.dosomething.org
16 	 2019-05-13 01:30:11 	 2010 283 https://web.archive.org/web/20100101000000/http://www.mathmovesu.com/sum-of-all-thrills.aspx?AuthTicket=debug
16 	 2019-05-13 01:30:28 	 2010 284 https://web.archive.org/web/20100101000000/http://www.blastgetcreative.co.uk
52 	 2019-05-13 01:31:21 	 2010 285 https://web.archive.org/web/20100101000000/http://www.newyorker.com
52 	 2019-05-13 01:32:13 	 2010 286 https://web.archive.org/web/20100101000000/http://www.nytimes.com
47 	 2019-05-13 01:33:00 	 2010 287 https://web.archive.org/web/20100101000000/http://www.wired.com
27 	 2019

Error scraping the Url
34 	 2019-05-13 01:58:34 	 2011 13 https://web.archive.org/web/20110101000000/http://www.downtownstjohns.com
37 	 2019-05-13 01:59:12 	 2011 14 https://web.archive.org/web/20110101000000/http://www.asla.org/greenroof
27 	 2019-05-13 01:59:39 	 2011 15 https://web.archive.org/web/20110101000000/http://www.2011Mediocrity.com
18 	 2019-05-13 01:59:58 	 2011 16 https://web.archive.org/web/20110101000000/http://www.it.vespa.com
Hiding wb toolbar error
54 	 2019-05-13 02:00:53 	 2011 17 https://web.archive.org/web/20110101000000/http://www.nissanusa.com/leaf-electric-car
22 	 2019-05-13 02:01:16 	 2011 18 https://web.archive.org/web/20110101000000/http://www.mbusa.com/whatdrivesus/
15 	 2019-05-13 02:01:31 	 2011 19 https://web.archive.org/web/20110101000000/http://www.mercedes-amg.com/#/m157
Hiding wb toolbar error
23 	 2019-05-13 02:01:55 	 2011 20 https://web.archive.org/web/20110101000000/http://www.lendingclub.com
Hiding wb toolbar error
Traceback (most recent cal

Error scraping the Url
303 	 2019-05-13 02:33:02 	 2011 42 https://web.archive.org/web/20110101000000/http://huffingtonpost.com/politics
35 	 2019-05-13 02:33:38 	 2011 43 https://web.archive.org/web/20110101000000/http://www.theatlantic.com/politics
48 	 2019-05-13 02:34:26 	 2011 44 https://web.archive.org/web/20110101000000/http://www.teamcoco.com
Hiding wb toolbar error
28 	 2019-05-13 02:34:55 	 2011 45 https://web.archive.org/web/20110101000000/http://www.wonderwall.com
19 	 2019-05-13 02:35:14 	 2011 46 https://web.archive.org/web/20110101000000/http://www.jayzhitscollection.com/
22 	 2019-05-13 02:35:37 	 2011 47 https://web.archive.org/web/20110101000000/http://U2.com
16 	 2019-05-13 02:35:53 	 2011 48 https://web.archive.org/web/20110101000000/http://TomPetty.com
21 	 2019-05-13 02:36:15 	 2011 49 https://web.archive.org/web/20110101000000/http://www.historypin.com/
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 10, in getMetrics
    driver

Error scraping the Url
313 	 2019-05-13 03:06:05 	 2011 81 https://web.archive.org/web/20110101000000/http://www.glassdoor.com
45 	 2019-05-13 03:06:51 	 2011 82 https://web.archive.org/web/20110101000000/http://www.tribecafilm.com
50 	 2019-05-13 03:07:41 	 2011 83 https://web.archive.org/web/20110101000000/http://www.youtube.com/play
33 	 2019-05-13 03:08:14 	 2011 84 https://web.archive.org/web/20110101000000/http://ocointeractive.com/awardshows/webbyAwards10/entries/tgtspec.html
Hiding wb toolbar error
36 	 2019-05-13 03:08:51 	 2011 85 https://web.archive.org/web/20110101000000/http://vimeoawards.com
23 	 2019-05-13 03:09:14 	 2011 86 https://web.archive.org/web/20110101000000/http://www.singaporeairshow.com.sg/
40 	 2019-05-13 03:09:55 	 2011 87 https://web.archive.org/web/20110101000000/http://www.babycenter.com
36 	 2019-05-13 03:10:31 	 2011 88 https://web.archive.org/web/20110101000000/http://www.drugfree.org
Traceback (most recent call last):
  File "<ipython-input-134-914ec

Error scraping the Url
302 	 2019-05-13 03:41:28 	 2011 110 https://web.archive.org/web/20110101000000/http://www.escapistmagazine.com/
19 	 2019-05-13 03:41:48 	 2011 111 https://web.archive.org/web/20110101000000/http://www.gamespot.com
57 	 2019-05-13 03:42:45 	 2011 112 https://web.archive.org/web/20110101000000/http://www.joystiq.com/
26 	 2019-05-13 03:43:12 	 2011 113 https://web.archive.org/web/20110101000000/http://www.gamefly.com
16 	 2019-05-13 03:43:29 	 2011 114 https://web.archive.org/web/20110101000000/http://www.blitzagency.com/awardsub/halowaypoint/
16 	 2019-05-13 03:43:45 	 2011 115 https://web.archive.org/web/20110101000000/http://www.capitol.gov
53 	 2019-05-13 03:44:39 	 2011 116 https://web.archive.org/web/20110101000000/http://www.nasa.gov
39 	 2019-05-13 03:45:19 	 2011 117 https://web.archive.org/web/20110101000000/http://solarsystem.nasa.gov/
62 	 2019-05-13 03:46:22 	 2011 118 https://web.archive.org/web/20110101000000/http://projects.washingtonpost.com/top-

Error scraping the Url
302 	 2019-05-13 04:20:27 	 2011 146 https://web.archive.org/web/20110101000000/http://www.escapistmagazine.com/
47 	 2019-05-13 04:21:14 	 2011 147 https://web.archive.org/web/20110101000000/http://www.readymade.com/
35 	 2019-05-13 04:21:50 	 2011 148 https://web.archive.org/web/20110101000000/http://www.nowness.com/
33 	 2019-05-13 04:22:23 	 2011 149 https://web.archive.org/web/20110101000000/http://www.thesweetbeet.com
Hiding wb toolbar error
66 	 2019-05-13 04:23:29 	 2011 150 https://web.archive.org/web/20110101000000/http://ngm.com
Hiding wb toolbar error
55 	 2019-05-13 04:24:24 	 2011 151 https://web.archive.org/web/20110101000000/http://www.nytimes.com/tmagazine
31 	 2019-05-13 04:24:55 	 2011 152 https://web.archive.org/web/20110101000000/http://www.rollingstone.com/
46 	 2019-05-13 04:25:42 	 2011 153 https://web.archive.org/web/20110101000000/http://www.vogue.com
16 	 2019-05-13 04:25:58 	 2011 154 https://web.archive.org/web/20110101000000/http://t

Error scraping the Url
321 	 2019-05-13 04:53:07 	 2011 171 https://web.archive.org/web/20110101000000/http://www.wsj.com
77 	 2019-05-13 04:54:24 	 2011 172 https://web.archive.org/web/20110101000000/http://huffingtonpost.com
33 	 2019-05-13 04:54:58 	 2011 173 https://web.archive.org/web/20110101000000/http://www.whatimade.com/
Hiding wb toolbar error
51 	 2019-05-13 04:55:50 	 2011 174 https://web.archive.org/web/20110101000000/http://the3six5.com
56 	 2019-05-13 04:56:46 	 2011 175 https://web.archive.org/web/20110101000000/http://www.variationsonnormal.com
Hiding wb toolbar error
29 	 2019-05-13 04:57:16 	 2011 176 https://web.archive.org/web/20110101000000/http://www.clouds365.com
22 	 2019-05-13 04:57:38 	 2011 177 https://web.archive.org/web/20110101000000/http://SpeakFromTheHeart.com
20 	 2019-05-13 04:57:59 	 2011 178 https://web.archive.org/web/20110101000000/http://voicesofmeningitis.org
17 	 2019-05-13 04:58:17 	 2011 179 https://web.archive.org/web/20110101000000/http://w

32 	 2019-05-13 05:21:37 	 2011 222 https://web.archive.org/web/20110101000000/http://www.scientificamerican.com
35 	 2019-05-13 05:22:12 	 2011 223 https://web.archive.org/web/20110101000000/http://www.exploratorium.edu/explore
27 	 2019-05-13 05:22:39 	 2011 224 https://web.archive.org/web/20110101000000/http://www.summitprojects.com
74 	 2019-05-13 05:23:54 	 2011 225 https://web.archive.org/web/20110101000000/http://www.twitterknitter.co.uk
19 	 2019-05-13 05:24:13 	 2011 226 https://web.archive.org/web/20110101000000/http://www.immersive-garden.com
18 	 2019-05-13 05:24:32 	 2011 227 https://web.archive.org/web/20110101000000/http://www.erwinpenland.com/work/details/shed
38 	 2019-05-13 05:25:10 	 2011 228 https://web.archive.org/web/20110101000000/http://www.behance.net
30 	 2019-05-13 05:25:41 	 2011 229 https://web.archive.org/web/20110101000000/http://www.vimeo.com
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 10, in getMetrics
    driver.g

Error scraping the Url
302 	 2019-05-13 05:56:44 	 2011 260 https://web.archive.org/web/20110101000000/http://www.eventbrite.com
44 	 2019-05-13 05:57:28 	 2011 261 https://web.archive.org/web/20110101000000/http://AwkwardFamilyPhotos.com
15 	 2019-05-13 05:57:43 	 2011 262 https://web.archive.org/web/20110101000000/http://www.whatthefuckshouldimakefordinner.com
25 	 2019-05-13 05:58:09 	 2011 263 https://web.archive.org/web/20110101000000/http://mostawesomestthingever.com/
23 	 2019-05-13 05:58:32 	 2011 264 https://web.archive.org/web/20110101000000/http://www.rathergood.com/
37 	 2019-05-13 05:59:10 	 2011 265 https://web.archive.org/web/20110101000000/http://www.thedailywh.at
Hiding wb toolbar error
28 	 2019-05-13 05:59:38 	 2011 266 https://web.archive.org/web/20110101000000/http://kids.nationalgeographic.com
17 	 2019-05-13 05:59:56 	 2011 267 https://web.archive.org/web/20110101000000/http://labs.youthventure.org/
37 	 2019-05-13 06:00:34 	 2011 268 https://web.archive.org/web/

Error scraping the Url
47 	 2019-05-13 06:25:54 	 2011 299 https://web.archive.org/web/20110101000000/http://www.thewildernessdowntown.com
31 	 2019-05-13 06:26:26 	 2011 300 https://web.archive.org/web/20110101000000/http://www.ted.com
17 	 2019-05-13 06:26:43 	 2011 301 https://web.archive.org/web/20110101000000/http://www.thejohnnycashproject.com/
15 	 2019-05-13 06:26:59 	 2011 302 https://web.archive.org/web/20110101000000/http://carlos.canalplus.fr/index_en.html
14 	 2019-05-13 06:27:13 	 2011 303 https://web.archive.org/web/20110101000000/http://www.monet2010.com
39 	 2019-05-13 06:27:53 	 2011 304 https://web.archive.org/web/20110101000000/http://life.com
15 	 2019-05-13 06:28:08 	 2011 305 https://web.archive.org/web/20110101000000/http://www.awards-showcase.net/louisvuitton/annie
20 	 2019-05-13 06:28:28 	 2011 306 https://web.archive.org/web/20110101000000/http://www.jayzhitscollection.com/
24 	 2019-05-13 06:28:52 	 2011 307 https://web.archive.org/web/20110101000000/http:/

Error scraping the Url
302 	 2019-05-13 06:56:10 	 2012 28 https://web.archive.org/web/20120101000000/http://www.newyorker.com/online/blogs/johncassidy/
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 10, in getMetrics
    driver.get(url)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 333, in get
    self.execute(Command.GET, {'url': url})
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message: timeout
  (Session info: chrome=73.0.3683.86)
  (Driver info: chromedriver=2.41.578700 (2f1ed5f9343c13f73144538f15c00b370eda6706),platform=Linux 4.18.0-10-generic x86_64)


Error scraping the 

Hiding wb toolbar error
37 	 2019-05-13 07:38:07 	 2012 76 https://web.archive.org/web/20120101000000/http://www.glassdoor.com
36 	 2019-05-13 07:38:44 	 2012 77 https://web.archive.org/web/20120101000000/http://www.freelancer.com
29 	 2019-05-13 07:39:13 	 2012 78 https://web.archive.org/web/20120101000000/http://vitamintalent.com/
23 	 2019-05-13 07:39:37 	 2012 79 https://web.archive.org/web/20120101000000/http://www.jibe.com
44 	 2019-05-13 07:40:22 	 2012 80 https://web.archive.org/web/20120101000000/http://www.tribecafilm.com
38 	 2019-05-13 07:41:00 	 2012 81 https://web.archive.org/web/20120101000000/http://www.sundancechannel.com/festival
18 	 2019-05-13 07:41:18 	 2012 82 https://web.archive.org/web/20120101000000/http://ten.propaganda3.com
20 	 2019-05-13 07:41:38 	 2012 83 https://web.archive.org/web/20120101000000/http://partytodiefor.com
42 	 2019-05-13 07:42:21 	 2012 84 https://web.archive.org/web/20120101000000/http://www.focusforwardfilms.com/
50 	 2019-05-13 07:43:12

22 	 2019-05-13 08:20:14 	 2012 128 https://web.archive.org/web/20120101000000/http://www.medify.com
31 	 2019-05-13 08:20:46 	 2012 129 https://web.archive.org/web/20120101000000/http://www.LIVESTRONG.com
39 	 2019-05-13 08:21:25 	 2012 130 https://web.archive.org/web/20120101000000/http://www.mskcc.org
31 	 2019-05-13 08:21:57 	 2012 131 https://web.archive.org/web/20120101000000/http://www.bhf.org.uk/about-us/50-years-of-heart-health.aspx
22 	 2019-05-13 08:22:19 	 2012 132 https://web.archive.org/web/20120101000000/http://www.avvo.com
138 	 2019-05-13 08:24:38 	 2012 133 https://web.archive.org/web/20120101000000/http://TheOnion.com
42 	 2019-05-13 08:25:21 	 2012 134 https://web.archive.org/web/20120101000000/http://www.CollegeHumor.com
88 	 2019-05-13 08:26:50 	 2012 135 https://web.archive.org/web/20120101000000/http://www.funnyordie.com
58 	 2019-05-13 08:27:48 	 2012 136 https://web.archive.org/web/20120101000000/http://www.teamcoco.com
22 	 2019-05-13 08:28:11 	 2012 137 http

74 	 2019-05-13 09:06:54 	 2012 164 https://web.archive.org/web/20120101000000/http://www.pitchfork.com
45 	 2019-05-13 09:07:39 	 2012 165 https://web.archive.org/web/20120101000000/http://www.pandora.com
Hiding wb toolbar error
32 	 2019-05-13 09:08:12 	 2012 166 https://web.archive.org/web/20120101000000/http://www.spotify.com
33 	 2019-05-13 09:08:45 	 2012 167 https://web.archive.org/web/20120101000000/http://www.npr.org/music/
14 	 2019-05-13 09:08:59 	 2012 168 https://web.archive.org/web/20120101000000/http://www.sfsymphony.org/timeline
Hiding wb toolbar error
34 	 2019-05-13 09:09:34 	 2012 169 https://web.archive.org/web/20120101000000/http://nfb.ca/blabla
26 	 2019-05-13 09:10:00 	 2012 170 https://web.archive.org/web/20120101000000/http://www.postsecret.com/
33 	 2019-05-13 09:10:33 	 2012 171 https://web.archive.org/web/20120101000000/http://snailmailmyemail.org
25 	 2019-05-13 09:10:59 	 2012 172 https://web.archive.org/web/20120101000000/http://cvdazzle.com
28 	 2019-05-

50 	 2019-05-13 09:54:43 	 2012 225 https://web.archive.org/web/20120101000000/http://www.wired.com/wiredscience/
35 	 2019-05-13 09:55:19 	 2012 226 https://web.archive.org/web/20120101000000/http://www.scientificamerican.com/
28 	 2019-05-13 09:55:47 	 2012 227 https://web.archive.org/web/20120101000000/http://planetquest.jpl.nasa.gov/
21 	 2019-05-13 09:56:08 	 2012 228 https://web.archive.org/web/20120101000000/http://robertjaso.com/
29 	 2019-05-13 09:56:38 	 2012 229 https://web.archive.org/web/20120101000000/http://portfolios.sva.edu
17 	 2019-05-13 09:56:55 	 2012 230 https://web.archive.org/web/20120101000000/http://www.thehumanmixtape.com
21 	 2019-05-13 09:57:17 	 2012 231 https://web.archive.org/web/20120101000000/http://www.WretchedBeauty.com
110 	 2019-05-13 09:59:07 	 2012 232 https://web.archive.org/web/20120101000000/http://www.kinetic.com.sg
81 	 2019-05-13 10:00:29 	 2012 233 https://web.archive.org/web/20120101000000/http://www.pinterest.com
Hiding wb toolbar error


Error scraping the Url
23 	 2019-05-13 10:31:59 	 2012 261 https://web.archive.org/web/20120101000000/http://www.hipmunk.com
20 	 2019-05-13 10:32:20 	 2012 262 https://web.archive.org/web/20120101000000/http://www.dropbox.com
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 10, in getMetrics
    driver.get(url)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 333, in get
    self.execute(Command.GET, {'url': url})
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message: timeout
  (Session info: chrome=73.0.3683.86)
  (Driver info: chromedriver=2.41.578700 (2f1ed5f9343c13f73144538f15c00b

17 	 2019-05-13 10:57:50 	 2012 290 https://web.archive.org/web/20120101000000/http://prty.jp/fastweb/award/en/
18 	 2019-05-13 10:58:08 	 2012 291 https://web.archive.org/web/20120101000000/https://www.manilla.com
22 	 2019-05-13 10:58:31 	 2012 292 https://web.archive.org/web/20120101000000/http://www.simplee.com
27 	 2019-05-13 10:58:58 	 2012 293 https://web.archive.org/web/20120101000000/http://www.wepay.com
21 	 2019-05-13 10:59:19 	 2012 294 https://web.archive.org/web/20120101000000/http://www.xero.com
26 	 2019-05-13 10:59:46 	 2012 295 https://web.archive.org/web/20120101000000/http://www.doxo.com
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 42, in getMetrics
    fontCount				= get_font_count(driver,soup)#Parameter 11
  File "<ipython-input-104-4b51463ec027>", line 8, in get_font_count
    fontStr+=d.execute_script(script+'["font"];')+"font"
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/w

47 	 2019-05-13 11:36:54 	 2012 316 https://web.archive.org/web/20120101000000/http://www.vh1.com/shows/pop_up_video/create.jhtml
33 	 2019-05-13 11:37:27 	 2012 317 https://web.archive.org/web/20120101000000/http://omg.yahoo.com/
14 	 2019-05-13 11:37:42 	 2012 318 https://web.archive.org/web/20120101000000/http://www.pineridgesioux.com
36 	 2019-05-13 11:38:19 	 2012 319 https://web.archive.org/web/20120101000000/http://www.amnestyusa.org/
27 	 2019-05-13 11:38:46 	 2012 320 https://web.archive.org/web/20120101000000/http://toadworkscreative.com/JL/awards/messageforjapan/
16 	 2019-05-13 11:39:02 	 2012 321 https://web.archive.org/web/20120101000000/http://bethedirector.girlscouts.org
32 	 2019-05-13 11:39:35 	 2012 322 https://web.archive.org/web/20120101000000/http://www.sesameworkshop.org
24 	 2019-05-13 11:39:59 	 2012 323 https://web.archive.org/web/20120101000000/http://www.ihadcancer.com
38 	 2019-05-13 11:40:38 	 2012 324 https://web.archive.org/web/20120101000000/http://Tumb

Hiding wb toolbar error
25 	 2019-05-13 12:14:38 	 2012 372 https://web.archive.org/web/20120101000000/http://www.wikinvest.com
25 	 2019-05-13 12:15:03 	 2012 373 https://web.archive.org/web/20120101000000/http://www.foodily.com
Hiding wb toolbar error
29 	 2019-05-13 12:15:33 	 2012 374 https://web.archive.org/web/20120101000000/http://doritos-the-end.sn77.net/
Hiding wb toolbar error
57 	 2019-05-13 12:16:30 	 2012 375 https://web.archive.org/web/20120101000000/http://www.nytimes.com/dining
83 	 2019-05-13 12:17:53 	 2012 376 https://web.archive.org/web/20120101000000/http://www.activatedrinks.com
26 	 2019-05-13 12:18:19 	 2012 377 https://web.archive.org/web/20120101000000/http://www.fivethirtybrew.com
24 	 2019-05-13 12:18:44 	 2012 378 https://web.archive.org/web/20120101000000/http://award.aid-dcc.com/androp_bell/en/
Hiding wb toolbar error
29 	 2019-05-13 12:19:14 	 2012 379 https://web.archive.org/web/20120101000000/http://www.Disney.com/Games
22 	 2019-05-13 12:19:36 	 2012 

Error scraping the Url
20 	 2019-05-13 12:53:14 	 2012 417 https://web.archive.org/web/20120101000000/http://chaosinyourtown.com/
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 10, in getMetrics
    driver.get(url)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 333, in get
    self.execute(Command.GET, {'url': url})
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message: timeout
  (Session info: chrome=73.0.3683.86)
  (Driver info: chromedriver=2.41.578700 (2f1ed5f9343c13f73144538f15c00b370eda6706),platform=Linux 4.18.0-10-generic x86_64)


Error scraping the Url
302 	 2019-05-13 12

Hiding wb toolbar error
74 	 2019-05-13 13:43:21 	 2012 477 https://web.archive.org/web/20120101000000/http://www.CNN.com/podcasts
32 	 2019-05-13 13:43:53 	 2012 478 https://web.archive.org/web/20120101000000/http://www.padmapper.com
17 	 2019-05-13 13:44:11 	 2012 479 https://web.archive.org/web/20120101000000/http://www.apartmentlist.com
22 	 2019-05-13 13:44:33 	 2012 480 https://web.archive.org/web/20120101000000/http://www.zillow.com
14 	 2019-05-13 13:44:48 	 2012 481 https://web.archive.org/web/20120101000000/http://awards.legworkstudio.com/union-station/
25 	 2019-05-13 13:45:13 	 2012 482 https://web.archive.org/web/20120101000000/http://www.stribling.com/
43 	 2019-05-13 13:45:56 	 2012 483 https://web.archive.org/web/20120101000000/http://fervr.net
Hiding wb toolbar error
40 	 2019-05-13 13:46:37 	 2012 484 https://web.archive.org/web/20120101000000/http://www.pbs.org/religion
Hiding wb toolbar error
108 	 2019-05-13 13:48:26 	 2012 485 https://web.archive.org/web/201201010

Error scraping the Url
315 	 2019-05-13 14:33:58 	 2012 525 https://web.archive.org/web/20120101000000/http://pbs.tv
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 10, in getMetrics
    driver.get(url)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 333, in get
    self.execute(Command.GET, {'url': url})
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message: timeout
  (Session info: chrome=73.0.3683.86)
  (Driver info: chromedriver=2.41.578700 (2f1ed5f9343c13f73144538f15c00b370eda6706),platform=Linux 4.18.0-10-generic x86_64)


Error scraping the Url
301 	 2019-05-13 14:38:59 	 2012

Error scraping the Url
25 	 2019-05-13 14:57:18 	 2013 3 https://web.archive.org/web/20130101000000/http://www.storiesfrom.us
46 	 2019-05-13 14:58:05 	 2013 4 https://web.archive.org/web/20130101000000/http://www.facesofdrunkdriving.com/
Hiding wb toolbar error
33 	 2019-05-13 14:58:38 	 2013 5 https://web.archive.org/web/20130101000000/http://www.thecreatorsproject.com/reel-2013/film-and-video/
17 	 2019-05-13 14:58:56 	 2013 6 https://web.archive.org/web/20130101000000/http://kukijar.com/2013/moma/
114 	 2019-05-13 15:00:50 	 2013 7 https://web.archive.org/web/20130101000000/http://www.moma.org/cindysherman
32 	 2019-05-13 15:01:22 	 2013 8 https://web.archive.org/web/20130101000000/http://blogs.guggenheim.org/
30 	 2019-05-13 15:01:53 	 2013 9 https://web.archive.org/web/20130101000000/http://www.newmuseum.org/
48 	 2019-05-13 15:02:41 	 2013 10 https://web.archive.org/web/20130101000000/http://withart.visitphilly.com/
46 	 2019-05-13 15:03:27 	 2013 11 https://web.archive.org/web/

21 	 2019-05-13 15:41:38 	 2013 35 https://web.archive.org/web/20130101000000/http://www.teamcoco.com
32 	 2019-05-13 15:42:11 	 2013 36 https://web.archive.org/web/20130101000000/http://hbo-awards.squarespace.com/hbo-connect
Hiding wb toolbar error
38 	 2019-05-13 15:42:49 	 2013 37 https://web.archive.org/web/20130101000000/http://bitly.com/rosemcgowanwebby
40 	 2019-05-13 15:43:29 	 2013 38 https://web.archive.org/web/20130101000000/http://omg.yahoo.com
82 	 2019-05-13 15:44:52 	 2013 39 https://web.archive.org/web/20130101000000/http://www.eonline.com
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 42, in getMetrics
    fontCount				= get_font_count(driver,soup)#Parameter 11
  File "<ipython-input-104-4b51463ec027>", line 8, in get_font_count
    fontStr+=d.execute_script(script+'["font"];')+"font"
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 636, in execute_script
    'args':

29 	 2019-05-13 16:17:08 	 2013 71 https://web.archive.org/web/20130101000000/http://www.girleffect.org
28 	 2019-05-13 16:17:36 	 2013 72 https://web.archive.org/web/20130101000000/http://www.chromeweblab.com
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 42, in getMetrics
    fontCount				= get_font_count(driver,soup)#Parameter 11
  File "<ipython-input-104-4b51463ec027>", line 8, in get_font_count
    fontStr+=d.execute_script(script+'["font"];')+"font"
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 636, in execute_script
    'args': converted_args})['value']
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stackt

Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 42, in getMetrics
    fontCount				= get_font_count(driver,soup)#Parameter 11
  File "<ipython-input-104-4b51463ec027>", line 8, in get_font_count
    fontStr+=d.execute_script(script+'["font"];')+"font"
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 636, in execute_script
    'args': converted_args})['value']
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: Cannot read property 'font' of undefined
  (Session info: chrome=73.0.3683.86)
  (Driver info: chromedriver=2.41.578700 (2f1ed5f9343c

28 	 2019-05-13 16:49:51 	 2013 123 https://web.archive.org/web/20130101000000/http://bear71.nfb.ca
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 42, in getMetrics
    fontCount				= get_font_count(driver,soup)#Parameter 11
  File "<ipython-input-104-4b51463ec027>", line 8, in get_font_count
    fontStr+=d.execute_script(script+'["font"];')+"font"
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 636, in execute_script
    'args': converted_args})['value']
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: Cannot read property 'font' of un

Error scraping the Url
19 	 2019-05-13 17:24:05 	 2013 173 https://web.archive.org/web/20130101000000/http://tllabs.io/asciistreetview/
28 	 2019-05-13 17:24:34 	 2013 174 https://web.archive.org/web/20130101000000/http://www.chromeweblab.com
60 	 2019-05-13 17:25:34 	 2013 175 https://web.archive.org/web/20130101000000/http://www.thedailybeast.com
94 	 2019-05-13 17:27:09 	 2013 176 https://web.archive.org/web/20130101000000/http://www.huffingtonpost.com/
Hiding wb toolbar error
49 	 2019-05-13 17:27:58 	 2013 177 https://web.archive.org/web/20130101000000/http://www.nytimes.com
40 	 2019-05-13 17:28:39 	 2013 178 https://web.archive.org/web/20130101000000/http://www.npr.org/
Hiding wb toolbar error
46 	 2019-05-13 17:29:25 	 2013 179 https://web.archive.org/web/20130101000000/http://www.bbc.co.uk/news/
38 	 2019-05-13 17:30:03 	 2013 180 https://web.archive.org/web/20130101000000/http://www.thibaultjorge.com
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>"

Hiding wb toolbar error
123 	 2019-05-13 17:53:05 	 2013 203 https://web.archive.org/web/20130101000000/http://CNN.com/Belief
79 	 2019-05-13 17:54:25 	 2013 204 https://web.archive.org/web/20130101000000/http://www.huffingtonpost.com/religion/
34 	 2019-05-13 17:54:59 	 2013 205 https://web.archive.org/web/20130101000000/http://us-en.superbook.cbn.com/
Hiding wb toolbar error
29 	 2019-05-13 17:55:28 	 2013 206 https://web.archive.org/web/20130101000000/http://www.oxfordislamicstudies.com/
Hiding wb toolbar error
28 	 2019-05-13 17:55:57 	 2013 207 https://web.archive.org/web/20130101000000/http://www.rcinet.ca/religions/
27 	 2019-05-13 17:56:24 	 2013 208 https://web.archive.org/web/20130101000000/http://www.uchicago.edu
20 	 2019-05-13 17:56:44 	 2013 209 https://web.archive.org/web/20130101000000/http://www.salt.edu/
51 	 2019-05-13 17:57:35 	 2013 210 https://web.archive.org/web/20130101000000/http://www.newschool.edu
30 	 2019-05-13 17:58:06 	 2013 211 https://web.archive.org/we

75 	 2019-05-13 18:25:52 	 2013 246 https://web.archive.org/web/20130101000000/http://travel.nationalgeographic.com/travel/
37 	 2019-05-13 18:26:30 	 2013 247 https://web.archive.org/web/20130101000000/http://www.kayak.com
41 	 2019-05-13 18:27:11 	 2013 248 https://web.archive.org/web/20130101000000/https://www.dropbox.com/
31 	 2019-05-13 18:27:42 	 2013 249 https://web.archive.org/web/20130101000000/http://www.squarespace.com
37 	 2019-05-13 18:28:19 	 2013 250 https://web.archive.org/web/20130101000000/http://www.seatgeek.com
40 	 2019-05-13 18:29:00 	 2013 251 https://web.archive.org/web/20130101000000/http://www.grammarly.com
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 42, in getMetrics
    fontCount				= get_font_count(driver,soup)#Parameter 11
  File "<ipython-input-104-4b51463ec027>", line 8, in get_font_count
    fontStr+=d.execute_script(script+'["font"];')+"font"
  File "/usr/local/lib/python3.6/site-packages/s

22 	 2019-05-13 18:44:24 	 2013 272 https://web.archive.org/web/20130101000000/http://www.rei1440project.com
37 	 2019-05-13 18:45:02 	 2013 273 https://web.archive.org/web/20130101000000/http://ed.ted.com/
26 	 2019-05-13 18:45:28 	 2013 274 https://web.archive.org/web/20130101000000/https://www.dropbox.com/
43 	 2019-05-13 18:46:12 	 2013 275 https://web.archive.org/web/20130101000000/http://www.vogue.co.uk
Hiding wb toolbar error
38 	 2019-05-13 18:46:51 	 2013 276 https://web.archive.org/web/20130101000000/http://cargocollective.com/awardsubmissions/uniqlo-ux
51 	 2019-05-13 18:47:42 	 2013 277 https://web.archive.org/web/20130101000000/http://www.monocle.com
21 	 2019-05-13 18:48:04 	 2013 278 https://web.archive.org/web/20130101000000/http://youareblind.com
19 	 2019-05-13 18:48:24 	 2013 279 https://web.archive.org/web/20130101000000/http://www.kreativitetstesten.no/en
28 	 2019-05-13 18:48:53 	 2013 280 https://web.archive.org/web/20130101000000/http://soundecology.nfb.ca/
42 	

32 	 2019-05-13 19:35:45 	 2013 323 https://web.archive.org/web/20130101000000/http://www.newmuseum.org/
47 	 2019-05-13 19:36:33 	 2013 324 https://web.archive.org/web/20130101000000/http://withart.visitphilly.com/
46 	 2019-05-13 19:37:19 	 2013 325 https://web.archive.org/web/20130101000000/http://www.aia.org
36 	 2019-05-13 19:37:56 	 2013 326 https://web.archive.org/web/20130101000000/http://adcyoungguns.org/
20 	 2019-05-13 19:38:16 	 2013 327 https://web.archive.org/web/20130101000000/http://entriesforyourconsideration.com/awards/webby/wcua/index.html
42 	 2019-05-13 19:38:59 	 2013 328 https://web.archive.org/web/20130101000000/http://www.mashable.com
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 10, in getMetrics
    driver.get(url)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 333, in get
    self.execute(Command.GET, {'url': url})
  File "/usr/local/lib/python3.6/site-packages/selenium/webdri

25 	 2019-05-13 20:16:35 	 2013 348 https://web.archive.org/web/20130101000000/http://www.teamcoco.com
30 	 2019-05-13 20:17:05 	 2013 349 https://web.archive.org/web/20130101000000/http://hbo-awards.squarespace.com/hbo-connect
Hiding wb toolbar error
46 	 2019-05-13 20:17:52 	 2013 350 https://web.archive.org/web/20130101000000/http://bitly.com/rosemcgowanwebby
43 	 2019-05-13 20:18:35 	 2013 351 https://web.archive.org/web/20130101000000/http://omg.yahoo.com
80 	 2019-05-13 20:19:55 	 2013 352 https://web.archive.org/web/20130101000000/http://www.eonline.com
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 42, in getMetrics
    fontCount				= get_font_count(driver,soup)#Parameter 11
  File "<ipython-input-104-4b51463ec027>", line 8, in get_font_count
    fontStr+=d.execute_script(script+'["font"];')+"font"
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 636, in execute_script
    'a

39 	 2019-05-13 20:54:13 	 2013 384 https://web.archive.org/web/20130101000000/http://www.girleffect.org
51 	 2019-05-13 20:55:05 	 2013 385 https://web.archive.org/web/20130101000000/http://www.chromeweblab.com
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 42, in getMetrics
    fontCount				= get_font_count(driver,soup)#Parameter 11
  File "<ipython-input-104-4b51463ec027>", line 8, in get_font_count
    fontStr+=d.execute_script(script+'["font"];')+"font"
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 636, in execute_script
    'args': converted_args})['value']
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stac

Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 42, in getMetrics
    fontCount				= get_font_count(driver,soup)#Parameter 11
  File "<ipython-input-104-4b51463ec027>", line 8, in get_font_count
    fontStr+=d.execute_script(script+'["font"];')+"font"
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 636, in execute_script
    'args': converted_args})['value']
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: Cannot read property 'font' of undefined
  (Session info: chrome=73.0.3683.86)
  (Driver info: chromedriver=2.41.578700 (2f1ed5f9343c

69 	 2019-05-13 21:40:13 	 2013 431 https://web.archive.org/web/20130101000000/http://www.milwaukeepolicenews.com/
43 	 2019-05-13 21:40:57 	 2013 432 https://web.archive.org/web/20130101000000/http://solarsystem.nasa.gov/index.cfm
21 	 2019-05-13 21:41:18 	 2013 433 https://web.archive.org/web/20130101000000/http://landinghost.com/award_shows/PedalMN_Interactive/
Hiding wb toolbar error
44 	 2019-05-13 21:42:02 	 2013 434 https://web.archive.org/web/20130101000000/http://nyc.gov/service
50 	 2019-05-13 21:42:52 	 2013 435 https://web.archive.org/web/20130101000000/http://www.jpl.nasa.gov/spaceimages
32 	 2019-05-13 21:43:25 	 2013 436 https://web.archive.org/web/20130101000000/http://www.lifeonterra.com
28 	 2019-05-13 21:43:54 	 2013 437 https://web.archive.org/web/20130101000000/http://bear71.nfb.ca
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 42, in getMetrics
    fontCount				= get_font_count(driver,soup)#Parameter 11
 

Error scraping the Url
21 	 2019-05-13 22:19:08 	 2013 487 https://web.archive.org/web/20130101000000/http://tllabs.io/asciistreetview/
33 	 2019-05-13 22:19:42 	 2013 488 https://web.archive.org/web/20130101000000/http://www.chromeweblab.com
68 	 2019-05-13 22:20:50 	 2013 489 https://web.archive.org/web/20130101000000/http://www.thedailybeast.com
92 	 2019-05-13 22:22:23 	 2013 490 https://web.archive.org/web/20130101000000/http://www.huffingtonpost.com/
Hiding wb toolbar error
51 	 2019-05-13 22:23:15 	 2013 491 https://web.archive.org/web/20130101000000/http://www.nytimes.com
42 	 2019-05-13 22:23:57 	 2013 492 https://web.archive.org/web/20130101000000/http://www.npr.org/
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 20, in getMetrics
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/support/wait.py", line 71, in un

44 	 2019-05-13 22:54:44 	 2013 511 https://web.archive.org/web/20130101000000/http://thepenguinpodcast.blogs.com/
88 	 2019-05-13 22:56:13 	 2013 512 https://web.archive.org/web/20130101000000/http://www.tabletmag.com/?cat=13
Hiding wb toolbar error
88 	 2019-05-13 22:57:42 	 2013 513 https://web.archive.org/web/20130101000000/http://www.CNN.com/soundwaves
20 	 2019-05-13 22:58:02 	 2013 514 https://web.archive.org/web/20130101000000/http://www.zillow.com
45 	 2019-05-13 22:58:48 	 2013 515 https://web.archive.org/web/20130101000000/http://www.TheRedPin.com
19 	 2019-05-13 22:59:07 	 2013 516 https://web.archive.org/web/20130101000000/http://www.apartmentlist.com
24 	 2019-05-13 22:59:32 	 2013 517 https://web.archive.org/web/20130101000000/http://sparefoot.com
Hiding wb toolbar error
107 	 2019-05-13 23:01:19 	 2013 518 https://web.archive.org/web/20130101000000/http://CNN.com/Belief
84 	 2019-05-13 23:02:43 	 2013 519 https://web.archive.org/web/20130101000000/http://www.huffingtonp

Error scraping the Url
42 	 2019-05-13 23:30:13 	 2013 557 https://web.archive.org/web/20130101000000/http://www.ifc.com/back-to-portlandia/
191 	 2019-05-13 23:33:25 	 2013 558 https://web.archive.org/web/20130101000000/http://www.hipmunk.com
39 	 2019-05-13 23:34:04 	 2013 559 https://web.archive.org/web/20130101000000/http://www.tripadvisor.com
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 20, in getMetrics
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/support/wait.py", line 71, in until
    value = method(self._driver)
  File "<ipython-input-134-914ecd78e0ec>", line 20, in <lambda>
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 546, in find_elements_by_tag_name
    return self.f

52 	 2019-05-13 23:49:49 	 2013 582 https://web.archive.org/web/20130101000000/http://www.condenast.com/
28 	 2019-05-13 23:50:18 	 2013 583 https://web.archive.org/web/20130101000000/http://cloudsovercuba.com/
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 20, in getMetrics
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/support/wait.py", line 71, in until
    value = method(self._driver)
  File "<ipython-input-134-914ecd78e0ec>", line 20, in <lambda>
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 546, in find_elements_by_tag_name
    return self.find_elements(by=By.TAG_NAME, value=name)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 1007, in find_elements
    'v

Error scraping the Url
301 	 2019-05-14 00:33:54 	 2014 8 https://web.archive.org/web/20140101000000/http://illusion.scene360.com
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 20, in getMetrics
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/support/wait.py", line 71, in until
    value = method(self._driver)
  File "<ipython-input-134-914ecd78e0ec>", line 20, in <lambda>
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 546, in find_elements_by_tag_name
    return self.find_elements(by=By.TAG_NAME, value=name)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 1007, in find_elements
    'value': value})['value'] or []
  File "/usr/local/lib/python3.6/site-packages/sele

28 	 2019-05-14 01:02:37 	 2014 34 https://web.archive.org/web/20140101000000/http://thecarcrush.com/
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 20, in getMetrics
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/support/wait.py", line 71, in until
    value = method(self._driver)
  File "<ipython-input-134-914ecd78e0ec>", line 20, in <lambda>
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 546, in find_elements_by_tag_name
    return self.find_elements(by=By.TAG_NAME, value=name)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 1007, in find_elements
    'value': value})['value'] or []
  File "/usr/local/lib/python3.6/site-packages/selenium

19 	 2019-05-14 01:40:11 	 2014 75 https://web.archive.org/web/20140101000000/http://www.coursera.org
13 	 2019-05-14 01:40:25 	 2014 76 https://web.archive.org/web/20140101000000/http://www.otheradvertising.nl/2013/100objects/
152 	 2019-05-14 01:42:57 	 2014 77 https://web.archive.org/web/20140101000000/http://www.hhmi.org/biointeractive/
54 	 2019-05-14 01:43:51 	 2014 78 https://web.archive.org/web/20140101000000/http://readysetgrad.org/
64 	 2019-05-14 01:44:56 	 2014 79 https://web.archive.org/web/20140101000000/http://ed.ted.com
62 	 2019-05-14 01:45:58 	 2014 80 https://web.archive.org/web/20140101000000/http://www.refinery29.com
24 	 2019-05-14 01:46:22 	 2014 81 https://web.archive.org/web/20140101000000/http://nowiknow.com/
48 	 2019-05-14 01:47:10 	 2014 82 https://web.archive.org/web/20140101000000/http://muckrack.com/daily/email
28 	 2019-05-14 01:47:39 	 2014 83 https://web.archive.org/web/20140101000000/http://www.qz.com/re/daily-brief/
36 	 2019-05-14 01:48:15 	 2014 8

22 	 2019-05-14 02:06:01 	 2014 109 https://web.archive.org/web/20140101000000/http://www.wepay.com
30 	 2019-05-14 02:06:32 	 2014 110 https://web.archive.org/web/20140101000000/http://maxbrenner.com/
30 	 2019-05-14 02:07:03 	 2014 111 https://web.archive.org/web/20140101000000/http://yeahburger.com
41 	 2019-05-14 02:07:45 	 2014 112 https://web.archive.org/web/20140101000000/http://www.blueapron.com
45 	 2019-05-14 02:08:30 	 2014 113 https://web.archive.org/web/20140101000000/http://andrewzimmern.com
31 	 2019-05-14 02:09:01 	 2014 114 https://web.archive.org/web/20140101000000/http://www.friendlys.com
28 	 2019-05-14 02:09:29 	 2014 115 https://web.archive.org/web/20140101000000/http://hellorun.helloenjoy.com
20 	 2019-05-14 02:09:50 	 2014 116 https://web.archive.org/web/20140101000000/http://www.atari.com/arcade
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 20, in getMetrics
    WebDriverWait(driver, timeout=15).until

Error scraping the Url
35 	 2019-05-14 02:27:34 	 2014 135 https://web.archive.org/web/20140101000000/http://www.chefsfeed.com
52 	 2019-05-14 02:28:27 	 2014 136 https://web.archive.org/web/20140101000000/http://www.metacritic.com/
20 	 2019-05-14 02:28:48 	 2014 137 https://web.archive.org/web/20140101000000/http://drinkdistiller.com
32 	 2019-05-14 02:29:21 	 2014 138 https://web.archive.org/web/20140101000000/http://agencyawards.com.au/webbys2014/remotecontroltourist/
31 	 2019-05-14 02:29:52 	 2014 139 https://web.archive.org/web/20140101000000/http://labdoor.com
50 	 2019-05-14 02:30:42 	 2014 140 https://web.archive.org/web/20140101000000/http://www.webmd.com
Hiding wb toolbar error
20 	 2019-05-14 02:31:03 	 2014 141 https://web.archive.org/web/20140101000000/http://upwave.com
38 	 2019-05-14 02:31:41 	 2014 142 https://web.archive.org/web/20140101000000/http://healthline.com
61 	 2019-05-14 02:32:43 	 2014 143 https://web.archive.org/web/20140101000000/http://www.strong4life.c

Hiding wb toolbar error
74 	 2019-05-14 03:07:58 	 2014 179 https://web.archive.org/web/20140101000000/http://www.nprmusic.org
48 	 2019-05-14 03:08:47 	 2014 180 https://web.archive.org/web/20140101000000/https://www.justareflektor.com/
74 	 2019-05-14 03:10:01 	 2014 181 https://web.archive.org/web/20140101000000/http://futurecoast.org
13 	 2019-05-14 03:10:15 	 2014 182 https://web.archive.org/web/20140101000000/http://selflessportraits.com/webbys-submission/
48 	 2019-05-14 03:11:03 	 2014 183 https://web.archive.org/web/20140101000000/http://humantide.co.uk/
47 	 2019-05-14 03:11:51 	 2014 184 https://web.archive.org/web/20140101000000/http://www.watchyulelog.com
Hiding wb toolbar error
32 	 2019-05-14 03:12:23 	 2014 185 https://web.archive.org/web/20140101000000/http://nytimes.com
45 	 2019-05-14 03:13:08 	 2014 186 https://web.archive.org/web/20140101000000/http://www.variety.com
47 	 2019-05-14 03:13:56 	 2014 187 https://web.archive.org/web/20140101000000/http://www.rollingst

Error scraping the Url
29 	 2019-05-14 03:44:37 	 2014 212 https://web.archive.org/web/20140101000000/http://www.renthop.com
Hiding wb toolbar error
48 	 2019-05-14 03:45:26 	 2014 213 https://web.archive.org/web/20140101000000/http://www.pbs.org/religion
33 	 2019-05-14 03:46:00 	 2014 214 https://web.archive.org/web/20140101000000/http://onbeing.org
31 	 2019-05-14 03:46:31 	 2014 215 https://web.archive.org/web/20140101000000/http://www.religiondispatches.org
59 	 2019-05-14 03:47:31 	 2014 216 https://web.archive.org/web/20140101000000/http://religionandpolitics.org
27 	 2019-05-14 03:47:58 	 2014 217 https://web.archive.org/web/20140101000000/http://forums.ssrc.org/ndsp/
29 	 2019-05-14 03:48:27 	 2014 218 https://web.archive.org/web/20140101000000/http://virtualtour.gwu.edu
52 	 2019-05-14 03:49:19 	 2014 219 https://web.archive.org/web/20140101000000/http://music.yale.edu/
72 	 2019-05-14 03:50:32 	 2014 220 https://web.archive.org/web/20140101000000/http://www.scad.edu
70 	 201

61 	 2019-05-14 04:26:51 	 2014 257 https://web.archive.org/web/20140101000000/https://roadtrippers.com
Hiding wb toolbar error
28 	 2019-05-14 04:27:19 	 2014 258 https://web.archive.org/web/20140101000000/https://www.squarespace.com/
54 	 2019-05-14 04:28:14 	 2014 259 https://web.archive.org/web/20140101000000/http://www.dudamobile.com
39 	 2019-05-14 04:28:53 	 2014 260 https://web.archive.org/web/20140101000000/http://www.seatgeek.com
48 	 2019-05-14 04:29:41 	 2014 261 https://web.archive.org/web/20140101000000/http://canva.com
38 	 2019-05-14 04:30:19 	 2014 262 https://web.archive.org/web/20140101000000/http://www.mailchimp.com
28 	 2019-05-14 04:30:48 	 2014 263 https://web.archive.org/web/20140101000000/http://black.thegifys.com/
59 	 2019-05-14 04:31:47 	 2014 264 https://web.archive.org/web/20140101000000/http://www.stufftoblowyourmind.com
47 	 2019-05-14 04:32:35 	 2014 265 https://web.archive.org/web/20140101000000/http://textastrophe.com/
22 	 2019-05-14 04:32:57 	 2014 

Error scraping the Url
24 	 2019-05-14 04:55:44 	 2014 297 https://web.archive.org/web/20140101000000/http://wondereur.com
Hiding wb toolbar error
92 	 2019-05-14 04:57:17 	 2014 298 https://web.archive.org/web/20140101000000/http://ngm.nationalgeographic.com/serengeti-lion
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 20, in getMetrics
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/support/wait.py", line 71, in until
    value = method(self._driver)
  File "<ipython-input-134-914ecd78e0ec>", line 20, in <lambda>
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 546, in find_elements_by_tag_name
    return self.find_elements(by=By.TAG_NAME, value=name)
  File "/usr/local/lib/python3.6/site-packages/selenium/we

55 	 2019-05-14 05:37:51 	 2015 25 https://web.archive.org/web/20150101000000/http://thedissolve.com
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 10, in getMetrics
    driver.get(url)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 333, in get
    self.execute(Command.GET, {'url': url})
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message: timeout
  (Session info: chrome=73.0.3683.86)
  (Driver info: chromedriver=2.41.578700 (2f1ed5f9343c13f73144538f15c00b370eda6706),platform=Linux 4.18.0-10-generic x86_64)


Error scraping the Url
302 	 2019-05-14 05:42:53 	 2015 26 https://web.

23 	 2019-05-14 06:19:06 	 2015 63 https://web.archive.org/web/20150101000000/http://eeyouwayoflife.ca/
48 	 2019-05-14 06:19:55 	 2015 64 https://web.archive.org/web/20150101000000/http://www.recyclons-nos-papiers.fr/
65 	 2019-05-14 06:21:00 	 2015 65 https://web.archive.org/web/20150101000000/http://warsawrising.eu
34 	 2019-05-14 06:21:35 	 2015 66 https://web.archive.org/web/20150101000000/http://www.guggenheim.org/new-york/collections/collection-online
57 	 2019-05-14 06:22:32 	 2015 67 https://web.archive.org/web/20150101000000/http://www.warink.org
39 	 2019-05-14 06:23:12 	 2015 68 https://web.archive.org/web/20150101000000/http://www.thehighline.org
19 	 2019-05-14 06:23:32 	 2015 69 https://web.archive.org/web/20150101000000/http://www.rgm.lv/
Hiding wb toolbar error
81 	 2019-05-14 06:24:54 	 2015 70 https://web.archive.org/web/20150101000000/http://www.cfr.org/sunnishia
31 	 2019-05-14 06:25:26 	 2015 71 https://web.archive.org/web/20150101000000/http://www.ted.com/watch/t

Error scraping the Url
32 	 2019-05-14 06:48:54 	 2015 105 https://web.archive.org/web/20150101000000/http://www.oola.com
64 	 2019-05-14 06:49:59 	 2015 106 https://web.archive.org/web/20150101000000/http://www.epicurious.com
30 	 2019-05-14 06:50:30 	 2015 107 https://web.archive.org/web/20150101000000/http://www.dishoom.com
22 	 2019-05-14 06:50:52 	 2015 108 https://web.archive.org/web/20150101000000/http://food.nationalgeographic.com
104 	 2019-05-14 06:52:37 	 2015 109 https://web.archive.org/web/20150101000000/http://cooking.nytimes.com
61 	 2019-05-14 06:53:38 	 2015 110 https://web.archive.org/web/20150101000000/http://sortieenmer.com/
30 	 2019-05-14 06:54:09 	 2015 111 https://web.archive.org/web/20150101000000/http://www.tamponrun.com
44 	 2019-05-14 06:54:54 	 2015 112 https://web.archive.org/web/20150101000000/http://race.assassinscreedpirates.com/
32 	 2019-05-14 06:55:27 	 2015 113 https://web.archive.org/web/20150101000000/http://smartypins.withgoogle.com/
63 	 2019-05

Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 10, in getMetrics
    driver.get(url)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 333, in get
    self.execute(Command.GET, {'url': url})
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message: timeout
  (Session info: chrome=73.0.3683.86)
  (Driver info: chromedriver=2.41.578700 (2f1ed5f9343c13f73144538f15c00b370eda6706),platform=Linux 4.18.0-10-generic x86_64)


Error scraping the Url
302 	 2019-05-14 07:35:22 	 2015 146 https://web.archive.org/web/20150101000000/http://jurist.org
23 	 2019-05-14 07:35:46 	 2015 147 https://web.arc

109 	 2019-05-14 08:07:51 	 2015 179 https://web.archive.org/web/20150101000000/http://nytimes.com
Hiding wb toolbar error
109 	 2019-05-14 08:09:40 	 2015 180 https://web.archive.org/web/20150101000000/http://www.theguardian.com
56 	 2019-05-14 08:10:36 	 2015 181 https://web.archive.org/web/20150101000000/https://news.vice.com/
84 	 2019-05-14 08:12:01 	 2015 182 https://web.archive.org/web/20150101000000/http://www.theweek.com
38 	 2019-05-14 08:12:40 	 2015 183 https://web.archive.org/web/20150101000000/http://qz.com
37 	 2019-05-14 08:13:17 	 2015 184 https://web.archive.org/web/20150101000000/http://thingsorganizedneatly.tumblr.com
46 	 2019-05-14 08:14:04 	 2015 185 https://web.archive.org/web/20150101000000/http://www.cardboardboxoffice.com
33 	 2019-05-14 08:14:37 	 2015 186 https://web.archive.org/web/20150101000000/http://stream.pleated-jeans.com/
36 	 2019-05-14 08:15:13 	 2015 187 https://web.archive.org/web/20150101000000/http://Littlebigdetails.com
33 	 2019-05-14 08:15:

Error scraping the Url
27 	 2019-05-14 08:30:11 	 2015 214 https://web.archive.org/web/20150101000000/http://feirstein.brooklyn.cuny.edu
26 	 2019-05-14 08:30:38 	 2015 215 https://web.archive.org/web/20150101000000/http://tech.cornell.edu/
45 	 2019-05-14 08:31:23 	 2015 216 https://web.archive.org/web/20150101000000/http://www.unr.edu
39 	 2019-05-14 08:32:03 	 2015 217 https://web.archive.org/web/20150101000000/https://minerva.kgi.edu/
27 	 2019-05-14 08:32:30 	 2015 218 https://web.archive.org/web/20150101000000/https://www.pafa.org/
Hiding wb toolbar error
34 	 2019-05-14 08:33:05 	 2015 219 https://web.archive.org/web/20150101000000/http://www.1pixelmoon.com
Hiding wb toolbar error
58 	 2019-05-14 08:34:03 	 2015 220 https://web.archive.org/web/20150101000000/http://www.bbc.com/earth
79 	 2019-05-14 08:35:22 	 2015 221 https://web.archive.org/web/20150101000000/http://climate.nasa.gov
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0

42 	 2019-05-14 09:15:55 	 2015 254 https://web.archive.org/web/20150101000000/https://education.skype.com/
43 	 2019-05-14 09:16:38 	 2015 255 https://web.archive.org/web/20150101000000/http://www.seatgeek.com
33 	 2019-05-14 09:17:12 	 2015 256 https://web.archive.org/web/20150101000000/http://www.squarespace.com/seven
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 20, in getMetrics
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/support/wait.py", line 71, in until
    value = method(self._driver)
  File "<ipython-input-134-914ecd78e0ec>", line 20, in <lambda>
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 546, in find_elements_by_tag_name
    return self.find_elements(by=By.TAG_NAME

35 	 2019-05-14 09:35:10 	 2015 288 https://web.archive.org/web/20150101000000/http://mashable.com/2014/10/24/ebola-liberia-hell-photographer/
37 	 2019-05-14 09:35:48 	 2015 289 https://web.archive.org/web/20150101000000/http://proof.nationalgeographic.com
114 	 2019-05-14 09:37:43 	 2015 290 https://web.archive.org/web/20150101000000/http://www.cnn.com/interactive/2014/07/travel/50-states-natural-wonders/index.html
41 	 2019-05-14 09:38:24 	 2015 291 https://web.archive.org/web/20150101000000/http://www.returntohope.com
Hiding wb toolbar error
60 	 2019-05-14 09:39:24 	 2015 292 https://web.archive.org/web/20150101000000/http://lens.blogs.nytimes.com/
32 	 2019-05-14 09:39:56 	 2015 293 https://web.archive.org/web/20150101000000/https://danslapeaudelours.canalplus.fr/en
42 	 2019-05-14 09:40:39 	 2015 294 https://web.archive.org/web/20150101000000/http://www.theguardian.com/world/ng-interactive/2014/jul/23/a-global-guide-to-the-first-world-war-interactive-documentary
52 	 2019-05-14 

41 	 2019-05-14 10:13:59 	 2015 327 https://web.archive.org/web/20150101000000/http://www.usopensessions.com/
Hiding wb toolbar error
31 	 2019-05-14 10:14:30 	 2015 328 https://web.archive.org/web/20150101000000/http://www.netflix.com
Hiding wb toolbar error
45 	 2019-05-14 10:15:15 	 2015 329 https://web.archive.org/web/20150101000000/http://www.alsa.org/icebucketchallenge
23 	 2019-05-14 10:15:39 	 2015 330 https://web.archive.org/web/20150101000000/http://www.snapchat.com
45 	 2019-05-14 10:16:24 	 2015 331 https://web.archive.org/web/20150101000000/http://serialpodcast.org/
36 	 2019-05-14 10:17:01 	 2015 332 https://web.archive.org/web/20150101000000/http://www.uber.com
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 42, in getMetrics
    fontCount				= get_font_count(driver,soup)#Parameter 11
  File "<ipython-input-104-4b51463ec027>", line 8, in get_font_count
    fontStr+=d.execute_script(script+'["font"];')+"font"
  File "/usr/local/lib/pytho

Error scraping the Url
25 	 2019-05-14 10:43:07 	 2016 26 https://web.archive.org/web/20160101000000/http://www.madeintheam.com/
42 	 2019-05-14 10:43:50 	 2016 27 https://web.archive.org/web/20160101000000/http://teamcoco.com/
122 	 2019-05-14 10:45:52 	 2016 28 https://web.archive.org/web/20160101000000/http://www.KendallJ.com
Hiding wb toolbar error
59 	 2019-05-14 10:46:52 	 2016 29 https://web.archive.org/web/20160101000000/http://www.u2.com
46 	 2019-05-14 10:47:38 	 2016 30 https://web.archive.org/web/20160101000000/http://www.thatsnotcool.com
47 	 2019-05-14 10:48:26 	 2016 31 https://web.archive.org/web/20160101000000/http://humansforhumans.ca
Hiding wb toolbar error
36 	 2019-05-14 10:49:03 	 2016 32 https://web.archive.org/web/20160101000000/http://www.havealittleheart.co.nz
Hiding wb toolbar error
45 	 2019-05-14 10:49:48 	 2016 33 https://web.archive.org/web/20160101000000/http://www.onedrop.org
30 	 2019-05-14 10:50:18 	 2016 34 https://web.archive.org/web/20160101000000/

Error scraping the Url
22 	 2019-05-14 11:13:28 	 2016 55 https://web.archive.org/web/20160101000000/http://jazz.org/blog
24 	 2019-05-14 11:13:53 	 2016 56 https://web.archive.org/web/20160101000000/http://concreteplayground.com
29 	 2019-05-14 11:14:23 	 2016 57 https://web.archive.org/web/20160101000000/http://www.nowness.com
33 	 2019-05-14 11:14:56 	 2016 58 https://web.archive.org/web/20160101000000/http://poly-graph.co
36 	 2019-05-14 11:15:32 	 2016 59 https://web.archive.org/web/20160101000000/http://www.museodelprado.es
40 	 2019-05-14 11:16:13 	 2016 60 https://web.archive.org/web/20160101000000/http://archeologie.culture.fr/chauvet/en/
35 	 2019-05-14 11:16:48 	 2016 61 https://web.archive.org/web/20160101000000/http://bluecadet.com/work/hmt-hoover-mason-trestle/
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 24, in getMetrics
    driver.save_screenshot(imagePath)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/we

36 	 2019-05-14 14:38:50 	 2016 100 https://web.archive.org/web/20160101000000/http://www.yummly.com
66 	 2019-05-14 14:39:56 	 2016 101 https://web.archive.org/web/20160101000000/http://www.oola.com
46 	 2019-05-14 14:40:42 	 2016 102 https://web.archive.org/web/20160101000000/http://www.epicurious.com
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 24, in getMetrics
    driver.save_screenshot(imagePath)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 1055, in save_screenshot
    return self.get_screenshot_as_file(filename)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 1032, in get_screenshot_as_file
    png = self.get_screenshot_as_png()
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 1064, in get_screenshot_as_png
    return base64.b64decode(self.get_screenshot_as_base64().encode('ascii'))
  File "/usr/local/lib/p

Error scraping the Url
19 	 2019-05-14 15:12:04 	 2016 137 https://web.archive.org/web/20160101000000/http://www.citizenshipworks.org
27 	 2019-05-14 15:12:32 	 2016 138 https://web.archive.org/web/20160101000000/http://maps.nulawlab.org/
50 	 2019-05-14 15:13:23 	 2016 139 https://web.archive.org/web/20160101000000/https://broadly.vice.com/en_us/
55 	 2019-05-14 15:14:18 	 2016 140 https://web.archive.org/web/20160101000000/http://www.wellandgood.com
45 	 2019-05-14 15:15:04 	 2016 141 https://web.archive.org/web/20160101000000/http://www.sonima.com/
39 	 2019-05-14 15:15:44 	 2016 142 https://web.archive.org/web/20160101000000/http://www.thrillist.com
48 	 2019-05-14 15:16:32 	 2016 143 https://web.archive.org/web/20160101000000/http://gearpatrol.com
48 	 2019-05-14 15:17:21 	 2016 144 https://web.archive.org/web/20160101000000/http://www.newyorker.com
58 	 2019-05-14 15:18:20 	 2016 145 https://web.archive.org/web/20160101000000/http://www.smithsonianmag.com
66 	 2019-05-14 15:19:26

Error scraping the Url
36 	 2019-05-14 15:38:57 	 2016 176 https://web.archive.org/web/20160101000000/http://www.tinytimes.com
20 	 2019-05-14 15:39:18 	 2016 177 https://web.archive.org/web/20160101000000/http://silesoleil.com/
44 	 2019-05-14 15:40:03 	 2016 178 https://web.archive.org/web/20160101000000/http://Www.drakedis.co
41 	 2019-05-14 15:40:44 	 2016 179 https://web.archive.org/web/20160101000000/http://www.factcheck.org/
Hiding wb toolbar error
Traceback (most recent call last):
  File "<ipython-input-134-914ecd78e0ec>", line 20, in getMetrics
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/support/wait.py", line 71, in until
    value = method(self._driver)
  File "<ipython-input-134-914ecd78e0ec>", line 20, in <lambda>
    WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/we

Hiding wb toolbar error
36 	 2019-05-14 16:00:28 	 2016 208 https://web.archive.org/web/20160101000000/http://Nixon.com
Hiding wb toolbar error
51 	 2019-05-14 16:01:20 	 2016 209 https://web.archive.org/web/20160101000000/http://barneys.com
56 	 2019-05-14 16:02:16 	 2016 210 https://web.archive.org/web/20160101000000/https://www.helmade.com/
43 	 2019-05-14 16:03:00 	 2016 211 https://web.archive.org/web/20160101000000/http://www.tenslife.com/
54 	 2019-05-14 16:03:55 	 2016 212 https://web.archive.org/web/20160101000000/http://ditto.com
57 	 2019-05-14 16:04:52 	 2016 213 https://web.archive.org/web/20160101000000/http://tumblr.com
21 	 2019-05-14 16:05:14 	 2016 214 https://web.archive.org/web/20160101000000/https://rabb.it
Hiding wb toolbar error
41 	 2019-05-14 16:05:56 	 2016 215 https://web.archive.org/web/20160101000000/http://www.wikihow.com
35 	 2019-05-14 16:06:32 	 2016 216 https://web.archive.org/web/20160101000000/http://ihadcancer.com
Hiding wb toolbar error
Traceback (

27 	 2019-05-14 16:47:09 	 2016 265 https://web.archive.org/web/20160101000000/http://www.theguardian.com/us-news/ng-interactive/2015/jun/01/the-counted-police-killings-us-database
Hiding wb toolbar error
39 	 2019-05-14 16:47:49 	 2016 266 https://web.archive.org/web/20160101000000/http://Nixon.com
20 	 2019-05-14 16:48:10 	 2016 267 https://web.archive.org/web/20160101000000/https://www.hbonow.com/sign-in
39 	 2019-05-14 16:48:49 	 2016 268 https://web.archive.org/web/20160101000000/http://falter.madebywild.com/#en
70 	 2019-05-14 16:49:59 	 2016 269 https://web.archive.org/web/20160101000000/http://www.giphy.com
29 	 2019-05-14 16:50:29 	 2016 270 https://web.archive.org/web/20160101000000/http://cancerfilms.org/classroom/
23 	 2019-05-14 16:50:52 	 2016 271 https://web.archive.org/web/20160101000000/http://suissemania.ch/
35 	 2019-05-14 16:51:28 	 2016 272 https://web.archive.org/web/20160101000000/http://because-recollection.com/
75 	 2019-05-14 16:52:43 	 2016 273 https://web.ar

55 	 2019-05-14 19:36:54 	 2016 304 https://web.archive.org/web/20160101000000/http://www.theguardian.com/news/series/the-long-read
126 	 2019-05-14 19:39:00 	 2016 305 https://web.archive.org/web/20160101000000/http://www.bloomberg.com/graphics/2015-paul-ford-what-is-code/
22 	 2019-05-14 19:39:23 	 2016 306 https://web.archive.org/web/20160101000000/https://performingarts.withgoogle.com/en_us
Hiding wb toolbar error
16 	 2019-05-14 19:39:40 	 2016 307 https://web.archive.org/web/20160101000000/http://w3w.co/webbys
76 	 2019-05-14 19:40:56 	 2016 308 https://web.archive.org/web/20160101000000/http://www.mtvbump.com/watch
13 	 2019-05-14 19:41:09 	 2016 309 https://web.archive.org/web/20160101000000/http://awardsfacility.com/google/entry
Hiding wb toolbar error
17 	 2019-05-14 19:41:26 	 2016 310 https://web.archive.org/web/20160101000000/http:// http://typetodesign.com
Hiding wb toolbar error
30 	 2019-05-14 19:41:57 	 2016 311 https://web.archive.org/web/20160101000000/https://www.ne