In [21]:
from bs4 import BeautifulSoup
from scipy.spatial import cKDTree as KDTree
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from pyvirtualdisplay import Display

import csv
import cv2
import datetime
import matplotlib
import multiprocessing as mp
import numpy as np
import re
import string
import sys
import unidecode
import traceback
import time

In [22]:
gridCount=1
rois=[]
PATH='/home/abhiavk/git/Website-Evolution/'

In [23]:
def timeTaken(startTime, Metric, MetricValue=""):
	print(Metric.ljust(25," "),datetime.datetime.now()-startTime,"\t\t",MetricValue)

In [24]:
def string_to_words(txt):
    txt=re.sub("[^\w]"," ",  txt)
    txt=re.sub("[0-9]+"," ",  txt)
    txt=re.sub(" [a-zA-Z]{1} "," ",  txt)
    txt=re.sub(" [a-zA-Z]{2} "," ",  txt)
    txt=re.sub("[ ]+"," ",txt)
    return txt.split()

In [25]:
def get_words(d):
    txt=""
    try:
        txt+=d.execute_script("return document.body.innerText;")
    except:
        pass
    try:
        txt+=d.execute_script("return document.innerText;")
    except:
        pass
    words = string_to_words(str(unidecode.unidecode(txt)))
    return words

In [26]:
def get_word_count(d):
    startTime=datetime.datetime.now()
    words=get_words(d)
    wordCount=float(len(words))
    #timeTaken(startTime,"Word Count",wordCount)
    return wordCount

In [27]:
def get_text_body_ratio(soup,wordCount):

    startTime=datetime.datetime.now()
    headers=[]
    for i in range(1,7):
        headers+=soup.findAll("h"+str(i))
    sizeHeaders=[]
    sizeHeaders+=soup.findAll("font",{"size":"3"})
    sizeHeaders+=soup.findAll("font",{"size":"4"})
    sizeHeaders+=soup.findAll("font",{"size":"5"})
    txt=""
    for i in headers:
        txt+=" "+i.text
    for i in sizeHeaders:
        txt+=" "+i.text
    words=[]
    if len(txt)!=0:
        words=string_to_words(str(unidecode.unidecode(txt)))
    #print words
    try:
        headTextCount=float(len(words))
    except:
        headtextCount=0.0
    if wordCount:
        textBodyRatio=headTextCount/wordCount
    else:
        textBodyRatio=0.0
    #timeTaken(startTime,"Text Body Ratio",textBodyRatio)
    return textBodyRatio

In [28]:
def get_emph_body_text_percentage(d,bs,wordCount):

    #print "Param3"
    startTime=datetime.datetime.now()
    boldText = bs.findAll("b")
    words=[]
    for i in boldText:
        try:
            words+= string_to_words(str(unidecode.unidecode(i.text)))
        except:
            pass
    boldWordCount=len(words)
    try:
        txt=str(unidecode.unidecode(d.execute_script("return document.body.innerText")))
    except:
        txt=str(unidecode.unidecode(d.execute_script("return document.body.textContent")))
    pattern = re.compile("!+")
    exclWordCount=len(re.findall(pattern,txt))
    words=get_words(d)
    capWordCount=0
    for i in words:
        if i==i.upper():
            capWordCount+=1

    #print boldWordCount, exclWordCount, capWordCount

    emphTextCount=float(boldWordCount + exclWordCount + capWordCount)

    if wordCount:
        emphTextPercent=(emphTextCount/wordCount)*100.0
    else:
        emphTextPercent=0.0
    #timeTaken(startTime,"Emph text Percent",emphTextPercent)
    return emphTextPercent

In [29]:
def get_text_position_changes(s):
    startTime=datetime.datetime.now()
    #print "Param
    elem=s.findAll()
    prev=""
    textPositionChanges=0
    for i in elem:
        try:
            string=str(i["style"])
            if "text-align:"in string:
                align=string.split("text-align:")[1]
                position=align.split(";")[0].strip()
                if position!=prev:
                    textPositionChanges+=1
                    prev=position
        except:
            pass
    #timeTaken(startTime,"Text Positional Changes",textPositionChanges)
    return textPositionChanges

In [30]:
def get_text_clusters(d,bs):

	#print "Param5"
	startTime=datetime.datetime.now()
	tableText= bs.findAll("td")+bs.findAll("table")
	paraText = bs.findAll("p")
	textClusters=len(tableText)+len(paraText)
	#timeTaken(startTime,"Text Clusters",textClusters)
	return textClusters

In [31]:
def get_visible_links(d,bs):

	#print "Param6"
	startTime=datetime.datetime.now()
	links=bs.findAll("a")
	visibleLinkCount=0
	for i in links:
		if i.text != "":
			visibleLinkCount+=1
	#timeTaken(startTime,"Visible Links",visibleLinkCount)
	return visibleLinkCount

In [32]:
def get_page_size(d):

	#print "Param7"
	startTime=datetime.datetime.now()
	scriptToExecute = "	var performance = 	window.performance ||\
											window.mozPerformance ||\
											window.msPerformance ||\
									 		window.webkitPerformance || {};\
						var network 	= 	performance.getEntries() || {};\
						return network;"
	networkData = d.execute_script(scriptToExecute)
	pageSize=0
	for i in networkData:
		try:
			pageSize+=float(i[u'transferSize'])
		except:
			pass
	pageSize=float(pageSize)/1024.0
	#timeTaken(startTime,"Page Size",pageSize)
	return pageSize

In [33]:
def get_graphics_percent(d,pageSize):

	#print "Param8"
	startTime=datetime.datetime.now()
	scriptToExecute = "var performance = window.performance || window.mozPerformance || window.msPerformance || window.webkitPerformance || {}; var network = performance.getEntries() || {}; return network;"
	networkData = d.execute_script(scriptToExecute)
	graphicsSize=0.0
	for i in networkData:
		try:
			if i[u'initiatorType']== u'script' or i[u'initiatorType']==u'img' or i['initiatorType']== u'css':
				graphicsSize+=float(i[u'transferSize'])
		except:
			pass
	graphicsSize=float(graphicsSize)/1024.0

	if pageSize==0:
		graphicsPercent=0.0
	else:
		graphicsPercent=graphicsSize*100.0/pageSize
	#timeTaken(startTime,"Graphic Size",graphicsSize)
	return graphicsPercent

In [34]:
def get_graphics_count(d,bs):
	startTime=datetime.datetime.now()
	#print "Param9"
	styleSteets=bs.findAll("style")
	scripts=bs.findAll("script")
	images=d.execute_script("return document.images;")
	graphicsCount=len(styleSteets)+len(images)+len(scripts)
	#timeTaken(startTime,"Graphics Count",graphicsCount)
	return  graphicsCount

In [35]:
def get_color_count(image):
    startTime=datetime.datetime.now()
    use_colors = matplotlib.colors.cnames
    named_colors = {k: tuple(map(int, (v[1:3], v[3:5], v[5:7]), 3*(16,))) for k, v in use_colors.items()}
    ncol = len(named_colors)
    no_match = named_colors['purple']

    color_tuples = list(named_colors.values())
    color_tuples.append(no_match)
    color_tuples = np.array(color_tuples)

    color_names = list(named_colors)
    color_names.append('no match')

    tree = KDTree(color_tuples[:-1])

    tolerance = np.inf
    dist, idx = tree.query(image, distance_upper_bound=tolerance)

    colCounts = np.bincount(idx.ravel(), None, ncol+1).tolist()
    colNames  = color_names

    colors=[]
    for i in range(len(color_names)):
        colors.append([colCounts[i],color_names[i]])

    colors.sort(reverse=True)

    colorCount=0
    for color in colors:
        if color[0]>=7864: #1% of the pixels
            colorCount+=1
        else:
            break

    #timeTaken(startTime,"Color Count",colorCount)
    return colorCount

In [36]:
def get_font_count(d,bs):
    startTime=datetime.datetime.now()
    divCount=len(bs.findAll("div"))
    diffFont=set([])
    for i in range(divCount):
        fontStr=""
        script='return document.getElementsByTagName("div")['+str(i)+']["style"]'
        fontStr+=d.execute_script(script+'["font"];')+"font"
        fontStr+=d.execute_script(script+'["fontDisplay"];')+"fontDisplay"
        fontStr+=d.execute_script(script+'["fontFamily"];')+"fontFamily"
        fontStr+=d.execute_script(script+'["fontFeatureSettings"];')+"fontFeatureSettings"
        fontStr+=d.execute_script(script+'["fontKerning"];')+"fontKerning"
        fontStr+=d.execute_script(script+'["fontSize"];')+"fontSize"
        fontStr+=d.execute_script(script+'["fontStretch"];')+"fontStretch"
        fontStr+=d.execute_script(script+'["fontStyle"];')+"fontStyle"
        fontStr+=d.execute_script(script+'["fontVariant"];')+"fontVariant"
        fontStr+=d.execute_script(script+'["fontVariantCaps"];')+"fontVariantCaps"
        fontStr+=d.execute_script(script+'["fontVariantEastAsian"];')+"fontVariantEastAsian"
        fontStr+=d.execute_script(script+'["fontVariantLigatures"];')+"fontVariantLigatures"
        fontStr+=d.execute_script(script+'["fontVariantNumeric"];')+"fontVariantNumeric"
        fontStr+=d.execute_script(script+'["fontVariationSettings"];')+"fontVariationSettings"
        fontStr+=d.execute_script(script+'["fontWeight"];')+"fontWeight"

        diffFont.add(fontStr)
    #print(diffFont)
    fontCount=len(diffFont)-1 # -1 for empty font (default font)
    #timeTaken(startTime,"Font Count",fontCount)
    return fontCount

In [37]:
def getColorfullness(image):
    startTime=datetime.datetime.now()
    (B, G, R) = cv2.split(image.astype("float"))
    rg = np.absolute(R - G)
    yb = np.absolute(0.5 * (R + G) - B)
    (rbMean, rbStd) = (np.mean(rg), np.std(rg))
    (ybMean, ybStd) = (np.mean(yb), np.std(yb))
    stdRoot = np.sqrt((rbStd ** 2) + (ybStd ** 2))
    meanRoot = np.sqrt((rbMean ** 2) + (ybMean ** 2))
    colourFullness = stdRoot + (0.3 * meanRoot)
    #timeTaken(startTime,"Colourfullness",colourFullness)
    return colourFullness

In [38]:
def getVisualComplexity(image,num):
    startTime=datetime.datetime.now()
    def splitImage(inImg):
        h,w = inImg.shape[0], inImg.shape[1]
        off1X=0
        off1Y=0
        off2X=0
        off2Y=0
        if w >= h:  #split X
            off1X=0
            off2X=int(w/2)
            img1 = inImg[0:h, 0:off2X]
            img2 = inImg[0:h, off2X:w]
        else:       #split Y
            off1Y=0
            off2Y=int(h/2)
            img1 = inImg[0:off2Y, 0:w]
            img2 = inImg[off2Y:h, 0:w]
        return off1X,off1Y,img1, off2X,off2Y,img2
    def qt(inImg, minStd, minSize, offX, offY):
        global gridCount
        global rois
        h,w = inImg.shape[0], inImg.shape[1]
        m,s = cv2.meanStdDev(inImg)
        if s>=minStd and max(h,w)>minSize:
            oX1,oY1,im1, oX2,oY2,im2 = splitImage(inImg)
            gridCount+=1
            qt(im1, minStd, minSize, offX+oX1, offY+oY1)
            qt(im2, minStd, minSize, offX+oX2, offY+oY2)
        else:
            rois.append([offX,offY,w,h,m,s])

    global gridCount
    global rois

    gridCount=1
    rois=[]
    offX, offY=0,0
    minDev        = 10.0
    minSz         = 20

    #cv2.imshow('Start Image',image)
    h,w = image.shape[0], image.shape[1]
    m,s = cv2.meanStdDev(image)
    qt(image,minDev,minSz,offX,offY)
    imgOut=image
    for e in rois:
        col=255
        if e[5]<minDev:
            col=0
        cv2.rectangle(imgOut, (e[0],e[1]), (e[0]+e[2],e[1]+e[3]), col, 1)
    cv2.imwrite(PATH+'webScreenshot/Usa/screenshot'+str(num)+'_Quad.png',imgOut)
    #cv2.imshow('Quad Image',imgOut)
    #cv2.waitKey(0)
    #cv2.destroyAllWindows()
    visualComplexity=gridCount#((gridCount*1.0)/(1024.0*768.0))**-1
    #timeTaken(startTime,"Visual Complexity",visualComplexity)
    return visualComplexity

In [39]:
def setDriverOptions():
    options 				= Options()
    options.binary_location = "webEvPy/bin/chromium-browser"
    chrome_driver_binary	= "webEvPy/bin/chromedriver"
    #options.add_argument("--headless")
    return	webdriver.Chrome(options=options)

In [40]:
def isErrorPage(image1,image2):
    if np.sum(abs(image1-image2))<7*10**6:
        return True
    return False

In [41]:
def getMetrics(urlFile):
    num=urlFile['id']
    url=urlFile['urls']
    #print(url,num)
    startTime 		= datetime.datetime.now()
    textFilename	= PATH+"yearMetrics/CorruptUrlsUsa.csv"
    csvFilename		= PATH+"yearMetrics/tempUrlMetricsUsa.csv"
    try:
        driver			= setDriverOptions()
        driver.get(url)
        """
        #Wayback Only
        try:
            style=driver.find_element_by_xpath("//div[@id='wm-ipp-base']")
            style=driver.execute_script("arguments[0].style.display = 'none'; return arguments[0];", style)
        except:
            print("Hiding wb toolbar error")
        """
        driver.implicitly_wait(3)
        time.sleep(6)
        driver.set_window_size(1024, 768)
        WebDriverWait(driver, timeout=3).until(lambda x: x.find_elements_by_tag_name('body'))
        
        imagePath=PATH+'webScreenshot/Usa/screenshot'+str(num)+'.png'
        #print(imagePath)
        driver.save_screenshot(imagePath)
        image = cv2.imread(imagePath)
        errorImage=cv2.imread('errorSample.png')
        try:
            if isErrorPage(errorImage,image):
                driver.close()
                return
        except:
            print("images unequal and not compared!!!")
        imageGrey = cv2.imread(imagePath,0)
        page_source=driver.page_source
        soup=BeautifulSoup(page_source,'html.parser')
        #---------------------------------------------------#
        #--------- Web Metric Calculation ------------------#
        #---------------------------------------------------#
        wordCount				= get_word_count(driver)#Parameter 1
        textBodyRatio			= get_text_body_ratio(soup,wordCount)#Parameter 2
        emphTextPercent			= get_emph_body_text_percentage(driver,soup,wordCount)#Parameter 3
        textPositionalChanges	= get_text_position_changes(soup)#Parameter 4
        textClusters			= get_text_clusters(driver,soup)#Parameter 5
        visibleLinks			= get_visible_links(driver,soup)#Parameter 6
        pageSize				= get_page_size(driver)#Parameter 7
        graphicsPercent			= get_graphics_percent(driver,pageSize)#Parameter 8
        graphicsCount 			= get_graphics_count(driver,soup)#Parameter 9
        colorCount				= get_color_count(image)#Parameter 10
        fontCount				= get_font_count(driver,soup)#Parameter 11
        colourFullness			= getColorfullness(image)#Parameter 12
        visualComplexity		= getVisualComplexity(imageGrey,num)


        tempMetrics=[
                    num,\
                    wordCount,\
                    textBodyRatio,\
                    emphTextPercent,\
                    textPositionalChanges,\
                    textClusters,\
                    visibleLinks,\
                    pageSize,\
                    graphicsPercent,\
                    graphicsCount,\
                    colorCount,\
                    fontCount,\
                    colourFullness,\
                    visualComplexity,\
                    url
            ]
        line=tempMetrics
        csvFile		= open(csvFilename,"a+")
        csvWriter	= csv.writer(csvFile)
        csvWriter.writerow(line)
        csvFile.close()
        driver.close()
    except:
        print(traceback.format_exc())
        try:
            driver.close()
        except:
            print("###Error : Couldn't close driver")
        print("Error scraping the Url")
        f2			= open(textFilename,"a+")
        f2.write(num+","+url+"\n")
        f2.close()
    print((datetime.datetime.now()-startTime).seconds,"\t",datetime.datetime.now().replace(microsecond=0),"\t",num,url)

In [42]:
def main(filename,year=""):
    
    fields			= ["slno","p1","p2","p3","p4","p5","p6","p7","p8","p9","p10","p11","p12","p13","url"]
    csvFilename		= "yearMetrics/tempUrlMetricsUsa.csv"
    csvFile			= open(csvFilename,"a+")
    csvWriter		= csv.writer(csvFile)
    csvWriter.writerow(fields)
    csvFile.close()
    
    fields			= ["id","urls"]
    csvFilename		= "yearMetrics/CorruptUrlsUsa.csv"
    csvFile			= open(csvFilename,"a+")
    csvWriter		= csv.writer(csvFile)
    csvWriter.writerow(fields)
    csvFile.close()
    
    csvFile			= open(filename,"r")
    urlFile			= csv.DictReader(csvFile)
    """
    driver			= setDriverOptions()
    manager 		= mp.Manager()
    urls 			= manager.list()
    results 		= manager.list()
    pool 			= mp.Pool(1)
    results 		= pool.map_async(getMetrics, urlFile)
    while not results.ready():
        pass
    """
    
    for url in urlFile:
        getMetrics(url)
    csvFile.close()

In [43]:
filename="tempUsa.csv"
main(filename)

images unequal and not compared!!!
26 	 2019-06-14 13:21:32 	 1 http://acus.gov
17 	 2019-06-14 13:21:50 	 2 http://clerk.house.gov
images unequal and not compared!!!
16 	 2019-06-14 13:22:06 	 3 http://dra.gov
18 	 2019-06-14 13:22:25 	 4 http://endangered.fws.gov
images unequal and not compared!!!
22 	 2019-06-14 13:22:57 	 6 http://icollege.ndu.edu
47 	 2019-06-14 13:23:44 	 7 http://jfsc.ndu.edu
16 	 2019-06-14 13:24:00 	 8 http://lcweb.loc.gov/flicc
31 	 2019-06-14 13:24:31 	 9 http://lsc.gov
113 	 2019-06-14 13:26:24 	 10 http://nicic.gov
24 	 2019-06-14 13:26:49 	 11 http://npic.orst.edu
21 	 2019-06-14 13:27:11 	 12 http://oceanservice.noaa.gov
19 	 2019-06-14 13:27:30 	 13 http://ofr.gov
15 	 2019-06-14 13:27:46 	 14 http://otexa.trade.gov/cita_otexa.htm
37 	 2019-06-14 13:28:23 	 15 http://science.energy.gov
17 	 2019-06-14 13:28:40 	 16 http://stats.bls.gov
18 	 2019-06-14 13:28:59 	 17 http://trade.gov/index.asp
48 	 2019-06-14 13:29:47 	 18 http://travel.state.gov
21 	 201

28 	 2019-06-14 13:53:25 	 64 http://www.centcom.mil
25 	 2019-06-14 13:53:50 	 65 http://www.cfa.gov
15 	 2019-06-14 13:54:06 	 66 http://www.cffc.navy.mil
95 	 2019-06-14 13:55:41 	 67 http://www.cfo.gov
18 	 2019-06-14 13:56:00 	 68 http://www.cftc.gov
20 	 2019-06-14 13:56:21 	 69 http://www.chcoc.gov
21 	 2019-06-14 13:56:42 	 70 http://www.cia.gov
16 	 2019-06-14 13:56:58 	 71 http://www.cio.gov
Traceback (most recent call last):
  File "<ipython-input-41-5feea5ccba46>", line 22, in getMetrics
    WebDriverWait(driver, timeout=3).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/support/wait.py", line 71, in until
    value = method(self._driver)
  File "<ipython-input-41-5feea5ccba46>", line 22, in <lambda>
    WebDriverWait(driver, timeout=3).until(lambda x: x.find_elements_by_tag_name('body'))
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 546, in find_elements_by

16 	 2019-06-14 14:46:32 	 138 http://www.fdic.gov
14 	 2019-06-14 14:46:47 	 139 http://www.feb.gov
31 	 2019-06-14 14:47:19 	 140 http://www.fec.gov
167 	 2019-06-14 14:50:06 	 141 http://www.federallabs.org
37 	 2019-06-14 14:50:43 	 142 http://www.federalreserve.gov
23 	 2019-06-14 14:51:07 	 143 http://www.fema.gov
19 	 2019-06-14 14:51:26 	 144 http://www.ferc.gov
16 	 2019-06-14 14:51:43 	 145 http://www.ffiec.gov
40 	 2019-06-14 14:52:41 	 147 http://www.fhfa.gov
25 	 2019-06-14 14:53:07 	 148 http://www.fhwa.dot.gov
29 	 2019-06-14 14:53:36 	 149 http://www.fiscal.treasury.gov
25 	 2019-06-14 14:54:01 	 150 http://www.fitness.gov
25 	 2019-06-14 14:54:26 	 151 http://www.fjc.gov
27 	 2019-06-14 14:54:53 	 152 http://www.fletc.gov
18 	 2019-06-14 14:55:12 	 153 http://www.floodsmart.gov
24 	 2019-06-14 14:55:37 	 154 http://www.flra.gov
149 	 2019-06-14 14:58:06 	 155 http://www.fmc.gov
49 	 2019-06-14 14:58:56 	 156 http://www.fmcs.gov
28 	 2019-06-14 14:59:25 	 157 http://www

28 	 2019-06-14 15:27:00 	 220 http://www.ncd.gov
33 	 2019-06-14 15:27:33 	 221 http://www.ncpc.gov
34 	 2019-06-14 15:28:08 	 222 http://www.ncua.gov
34 	 2019-06-14 15:28:42 	 223 http://www.ndu.edu
18 	 2019-06-14 15:29:00 	 224 http://www.ndu.edu/Academics/CollegesCenters/NationalWarCollege.aspx
18 	 2019-06-14 15:29:19 	 225 http://www.ndu.edu/Academics/CollegesCenters/TheEisenhowerSchool.aspx
148 	 2019-06-14 15:31:48 	 226 http://www.neh.gov
29 	 2019-06-14 15:32:17 	 227 http://www.nga.gov
25 	 2019-06-14 15:32:42 	 228 http://www.nga.mil
27 	 2019-06-14 15:33:10 	 229 http://www.nhtsa.dot.gov
34 	 2019-06-14 15:33:44 	 230 http://www.ni-u.edu
30 	 2019-06-14 15:34:15 	 231 http://www.nifa.usda.gov
18 	 2019-06-14 15:34:33 	 232 http://www.nifc.gov
28 	 2019-06-14 15:35:02 	 233 http://www.nih.gov
28 	 2019-06-14 15:35:31 	 234 http://www.nij.gov/Pages/welcome.aspx
26 	 2019-06-14 15:35:57 	 235 http://www.nimh.nih.gov
29 	 2019-06-14 15:36:26 	 236 http://www.nist.gov
32 	 20

21 	 2019-06-14 16:37:36 	 294 http://www.stb.dot.gov
26 	 2019-06-14 16:38:03 	 295 http://www.stennis.gov
37 	 2019-06-14 16:38:40 	 296 http://www.stratcom.mil
23 	 2019-06-14 16:39:03 	 297 http://www.supremecourtus.gov
17 	 2019-06-14 16:40:51 	 299 http://www.treas.gov/ffb
45 	 2019-06-14 16:41:36 	 300 http://www.treasury.gov/Pages/default.aspx
37 	 2019-06-14 16:42:14 	 301 http://www.treasury.gov/about/organizational-structure/offices/International-Affairs/Pages/cfius-index.aspx
24 	 2019-06-14 16:42:38 	 302 http://www.truman.gov
31 	 2019-06-14 16:44:05 	 304 http://www.tva.gov
29 	 2019-06-14 16:44:35 	 305 http://www.udall.gov
25 	 2019-06-14 16:45:00 	 306 http://www.us-cert.gov
46 	 2019-06-14 16:45:47 	 307 http://www.usace.army.mil
34 	 2019-06-14 16:46:21 	 308 http://www.usadf.gov/welcome
26 	 2019-06-14 16:46:48 	 309 http://www.usaid.gov
31 	 2019-06-14 16:47:19 	 310 http://www.usbg.gov
16 	 2019-06-14 16:47:36 	 311 http://www.usccr.gov
21 	 2019-06-14 16:47:57 	

35 	 2019-06-14 17:55:13 	 425 https://www.va.gov/opa/vetsday/vdnc.asp
32 	 2019-06-14 17:55:45 	 426 https://www.wapa.gov
17 	 2019-06-14 17:56:03 	 427 https://www.whitehouse.gov/cea
17 	 2019-06-14 17:56:21 	 428 https://www.whitehouse.gov/ceq
17 	 2019-06-14 17:56:38 	 429 https://www.whitehouse.gov/omb
17 	 2019-06-14 17:56:56 	 430 https://www.whitehouse.gov/ostp
Traceback (most recent call last):
  File "<ipython-input-41-5feea5ccba46>", line 10, in getMetrics
    driver.get(url)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 333, in get
    self.execute(Command.GET, {'url': url})
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.T