In [1]:
from bs4 import BeautifulSoup
from scipy.spatial import cKDTree as KDTree
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from pyvirtualdisplay import Display

import csv
import cv2
import datetime
import matplotlib
import multiprocessing as mp
import numpy as np
import re
import string
import sys
import unidecode
import traceback
import time

In [2]:
gridCount=1
rois=[]
PATH='/home/abhiavk/git/Website-Evolution/'

In [3]:
def timeTaken(startTime, Metric, MetricValue=""):
	print(Metric.ljust(25," "),datetime.datetime.now()-startTime,"\t\t",MetricValue)

In [4]:
def string_to_words(txt):
    txt=re.sub("[^\w]"," ",  txt)
    txt=re.sub("[0-9]+"," ",  txt)
    txt=re.sub(" [a-zA-Z]{1} "," ",  txt)
    txt=re.sub(" [a-zA-Z]{2} "," ",  txt)
    txt=re.sub("[ ]+"," ",txt)
    return txt.split()

In [5]:
def get_words(d):
    txt=""
    try:
        txt+=d.execute_script("return document.body.innerText;")
    except:
        pass
    try:
        txt+=d.execute_script("return document.innerText;")
    except:
        pass
    words = string_to_words(str(unidecode.unidecode(txt)))
    return words

In [6]:
def get_word_count(d):
    startTime=datetime.datetime.now()
    words=get_words(d)
    wordCount=float(len(words))
    #timeTaken(startTime,"Word Count",wordCount)
    return wordCount

In [7]:
def get_text_body_ratio(soup,wordCount):

    startTime=datetime.datetime.now()
    headers=[]
    for i in range(1,7):
        headers+=soup.findAll("h"+str(i))
    sizeHeaders=[]
    sizeHeaders+=soup.findAll("font",{"size":"3"})
    sizeHeaders+=soup.findAll("font",{"size":"4"})
    sizeHeaders+=soup.findAll("font",{"size":"5"})
    txt=""
    for i in headers:
        txt+=" "+i.text
    for i in sizeHeaders:
        txt+=" "+i.text
    words=[]
    if len(txt)!=0:
        words=string_to_words(str(unidecode.unidecode(txt)))
    #print words
    try:
        headTextCount=float(len(words))
    except:
        headtextCount=0.0
    if wordCount:
        textBodyRatio=headTextCount/wordCount
    else:
        textBodyRatio=0.0
    #timeTaken(startTime,"Text Body Ratio",textBodyRatio)
    return textBodyRatio

In [8]:
def get_emph_body_text_percentage(d,bs,wordCount):

    #print "Param3"
    startTime=datetime.datetime.now()
    boldText = bs.findAll("b")
    words=[]
    for i in boldText:
        try:
            words+= string_to_words(str(unidecode.unidecode(i.text)))
        except:
            pass
    boldWordCount=len(words)
    try:
        txt=str(unidecode.unidecode(d.execute_script("return document.body.innerText")))
    except:
        txt=str(unidecode.unidecode(d.execute_script("return document.body.textContent")))
    pattern = re.compile("!+")
    exclWordCount=len(re.findall(pattern,txt))
    words=get_words(d)
    capWordCount=0
    for i in words:
        if i==i.upper():
            capWordCount+=1

    #print boldWordCount, exclWordCount, capWordCount

    emphTextCount=float(boldWordCount + exclWordCount + capWordCount)

    if wordCount:
        emphTextPercent=(emphTextCount/wordCount)*100.0
    else:
        emphTextPercent=0.0
    #timeTaken(startTime,"Emph text Percent",emphTextPercent)
    return emphTextPercent

In [9]:
def get_text_position_changes(s):
    startTime=datetime.datetime.now()
    #print "Param
    elem=s.findAll()
    prev=""
    textPositionChanges=0
    for i in elem:
        try:
            string=str(i["style"])
            if "text-align:"in string:
                align=string.split("text-align:")[1]
                position=align.split(";")[0].strip()
                if position!=prev:
                    textPositionChanges+=1
                    prev=position
        except:
            pass
    #timeTaken(startTime,"Text Positional Changes",textPositionChanges)
    return textPositionChanges

In [10]:
def get_text_clusters(d,bs):

	#print "Param5"
	startTime=datetime.datetime.now()
	tableText= bs.findAll("td")+bs.findAll("table")
	paraText = bs.findAll("p")
	textClusters=len(tableText)+len(paraText)
	#timeTaken(startTime,"Text Clusters",textClusters)
	return textClusters

In [11]:
def get_visible_links(d,bs):

	#print "Param6"
	startTime=datetime.datetime.now()
	links=bs.findAll("a")
	visibleLinkCount=0
	for i in links:
		if i.text != "":
			visibleLinkCount+=1
	#timeTaken(startTime,"Visible Links",visibleLinkCount)
	return visibleLinkCount

In [12]:
def get_page_size(d):

	#print "Param7"
	startTime=datetime.datetime.now()
	scriptToExecute = "	var performance = 	window.performance ||\
											window.mozPerformance ||\
											window.msPerformance ||\
									 		window.webkitPerformance || {};\
						var network 	= 	performance.getEntries() || {};\
						return network;"
	networkData = d.execute_script(scriptToExecute)
	pageSize=0
	for i in networkData:
		try:
			pageSize+=float(i[u'transferSize'])
		except:
			pass
	pageSize=float(pageSize)/1024.0
	#timeTaken(startTime,"Page Size",pageSize)
	return pageSize

In [13]:
def get_graphics_percent(d,pageSize):

	#print "Param8"
	startTime=datetime.datetime.now()
	scriptToExecute = "var performance = window.performance || window.mozPerformance || window.msPerformance || window.webkitPerformance || {}; var network = performance.getEntries() || {}; return network;"
	networkData = d.execute_script(scriptToExecute)
	graphicsSize=0.0
	for i in networkData:
		try:
			if i[u'initiatorType']== u'script' or i[u'initiatorType']==u'img' or i['initiatorType']== u'css':
				graphicsSize+=float(i[u'transferSize'])
		except:
			pass
	graphicsSize=float(graphicsSize)/1024.0

	if pageSize==0:
		graphicsPercent=0.0
	else:
		graphicsPercent=graphicsSize*100.0/pageSize
	#timeTaken(startTime,"Graphic Size",graphicsSize)
	return graphicsPercent

In [14]:
def get_graphics_count(d,bs):
	startTime=datetime.datetime.now()
	#print "Param9"
	styleSteets=bs.findAll("style")
	scripts=bs.findAll("script")
	images=d.execute_script("return document.images;")
	graphicsCount=len(styleSteets)+len(images)+len(scripts)
	#timeTaken(startTime,"Graphics Count",graphicsCount)
	return  graphicsCount

In [15]:
def get_color_count(image):
    startTime=datetime.datetime.now()
    use_colors = matplotlib.colors.cnames
    named_colors = {k: tuple(map(int, (v[1:3], v[3:5], v[5:7]), 3*(16,))) for k, v in use_colors.items()}
    ncol = len(named_colors)
    no_match = named_colors['purple']

    color_tuples = list(named_colors.values())
    color_tuples.append(no_match)
    color_tuples = np.array(color_tuples)

    color_names = list(named_colors)
    color_names.append('no match')

    tree = KDTree(color_tuples[:-1])

    tolerance = np.inf
    dist, idx = tree.query(image, distance_upper_bound=tolerance)

    colCounts = np.bincount(idx.ravel(), None, ncol+1).tolist()
    colNames  = color_names

    colors=[]
    for i in range(len(color_names)):
        colors.append([colCounts[i],color_names[i]])

    colors.sort(reverse=True)

    colorCount=0
    for color in colors:
        if color[0]>=7864: #1% of the pixels
            colorCount+=1
        else:
            break

    #timeTaken(startTime,"Color Count",colorCount)
    return colorCount

In [16]:
def get_font_count(d,bs):
    startTime=datetime.datetime.now()
    divCount=len(bs.findAll("div"))
    diffFont=set([])
    for i in range(divCount):
        fontStr=""
        script='return document.getElementsByTagName("div")['+str(i)+']["style"]'
        fontStr+=d.execute_script(script+'["font"];')+"font"
        fontStr+=d.execute_script(script+'["fontDisplay"];')+"fontDisplay"
        fontStr+=d.execute_script(script+'["fontFamily"];')+"fontFamily"
        fontStr+=d.execute_script(script+'["fontFeatureSettings"];')+"fontFeatureSettings"
        fontStr+=d.execute_script(script+'["fontKerning"];')+"fontKerning"
        fontStr+=d.execute_script(script+'["fontSize"];')+"fontSize"
        fontStr+=d.execute_script(script+'["fontStretch"];')+"fontStretch"
        fontStr+=d.execute_script(script+'["fontStyle"];')+"fontStyle"
        fontStr+=d.execute_script(script+'["fontVariant"];')+"fontVariant"
        fontStr+=d.execute_script(script+'["fontVariantCaps"];')+"fontVariantCaps"
        fontStr+=d.execute_script(script+'["fontVariantEastAsian"];')+"fontVariantEastAsian"
        fontStr+=d.execute_script(script+'["fontVariantLigatures"];')+"fontVariantLigatures"
        fontStr+=d.execute_script(script+'["fontVariantNumeric"];')+"fontVariantNumeric"
        fontStr+=d.execute_script(script+'["fontVariationSettings"];')+"fontVariationSettings"
        fontStr+=d.execute_script(script+'["fontWeight"];')+"fontWeight"

        diffFont.add(fontStr)
    #print(diffFont)
    fontCount=len(diffFont)-1 # -1 for empty font (default font)
    #timeTaken(startTime,"Font Count",fontCount)
    return fontCount

In [17]:
def getColorfullness(image):
    startTime=datetime.datetime.now()
    (B, G, R) = cv2.split(image.astype("float"))
    rg = np.absolute(R - G)
    yb = np.absolute(0.5 * (R + G) - B)
    (rbMean, rbStd) = (np.mean(rg), np.std(rg))
    (ybMean, ybStd) = (np.mean(yb), np.std(yb))
    stdRoot = np.sqrt((rbStd ** 2) + (ybStd ** 2))
    meanRoot = np.sqrt((rbMean ** 2) + (ybMean ** 2))
    colourFullness = stdRoot + (0.3 * meanRoot)
    #timeTaken(startTime,"Colourfullness",colourFullness)
    return colourFullness

In [18]:
def getVisualComplexity(image,num):
    startTime=datetime.datetime.now()
    year=sys.argv[-2]
    def splitImage(inImg):
        h,w = inImg.shape[0], inImg.shape[1]
        off1X=0
        off1Y=0
        off2X=0
        off2Y=0
        if w >= h:  #split X
            off1X=0
            off2X=int(w/2)
            img1 = inImg[0:h, 0:off2X]
            img2 = inImg[0:h, off2X:w]
        else:       #split Y
            off1Y=0
            off2Y=int(h/2)
            img1 = inImg[0:off2Y, 0:w]
            img2 = inImg[off2Y:h, 0:w]
        return off1X,off1Y,img1, off2X,off2Y,img2
    def qt(inImg, minStd, minSize, offX, offY):
        global gridCount
        global rois
        h,w = inImg.shape[0], inImg.shape[1]
        m,s = cv2.meanStdDev(inImg)
        if s>=minStd and max(h,w)>minSize:
            oX1,oY1,im1, oX2,oY2,im2 = splitImage(inImg)
            gridCount+=1
            qt(im1, minStd, minSize, offX+oX1, offY+oY1)
            qt(im2, minStd, minSize, offX+oX2, offY+oY2)
        else:
            rois.append([offX,offY,w,h,m,s])

    global gridCount
    global rois

    gridCount=1
    rois=[]
    offX, offY=0,0
    minDev        = 10.0
    minSz         = 20

    #cv2.imshow('Start Image',image)
    h,w = image.shape[0], image.shape[1]
    m,s = cv2.meanStdDev(image)
    qt(image,minDev,minSz,offX,offY)
    imgOut=image
    for e in rois:
        col=255
        if e[5]<minDev:
            col=0
        cv2.rectangle(imgOut, (e[0],e[1]), (e[0]+e[2],e[1]+e[3]), col, 1)
    cv2.imwrite(PATH+'webScreenshot/'+str(year)+'/screenshot'+str(num)+'_Quad.png',imgOut)
    #cv2.imshow('Quad Image',imgOut)
    #cv2.waitKey(0)
    #cv2.destroyAllWindows()
    visualComplexity=gridCount#((gridCount*1.0)/(1024.0*768.0))**-1
    #timeTaken(startTime,"Visual Complexity",visualComplexity)
    return visualComplexity

In [19]:
def setDriverOptions():
    options 				= Options()
    options.binary_location = "webEvPy/bin/chromium-browser"
    chrome_driver_binary	= "webEvPy/bin/chromedriver"
    #options.add_argument("--headless")
    return	webdriver.Chrome(options=options)

In [20]:
def getMetrics(urlFile):
    num=urlFile['id']
    url=urlFile['urls']
    #print(url,num)
    startTime 		= datetime.datetime.now()
    textFilename	= PATH+"yearMetrics/CorruptUrls"+str(year)+".csv"
    csvFilename		= PATH+"yearMetrics/tempMpUrlMetrics"+str(year)+".csv"
    try:
        driver			= setDriverOptions()
        driver.get(url)
        try:
            style=driver.find_element_by_xpath("//div[@id='wm-ipp-base']")
            style=driver.execute_script("arguments[0].style.display = 'none'; return arguments[0];", style)
        except:
            print("Hiding wb toolbar error")
            pass
        driver.implicitly_wait(10)
        time.sleep(5)
        driver.set_window_size(1024, 768)
        WebDriverWait(driver, timeout=15).until(lambda x: x.find_elements_by_tag_name('body'))
        
        imagePath=PATH+'webScreenshot/'+str(year)+'/screenshot'+str(num)+'.png'
        #print(imagePath)
        driver.save_screenshot(imagePath)
        image = cv2.imread(imagePath)
        imageGrey = cv2.imread(imagePath,0)
        page_source=driver.page_source
        soup=BeautifulSoup(page_source,'html.parser')
        #---------------------------------------------------#
        #--------- Web Metric Calculation ------------------#
        #---------------------------------------------------#
        wordCount				= get_word_count(driver)#Parameter 1
        textBodyRatio			= get_text_body_ratio(soup,wordCount)#Parameter 2
        emphTextPercent			= get_emph_body_text_percentage(driver,soup,wordCount)#Parameter 3
        textPositionalChanges	= get_text_position_changes(soup)#Parameter 4
        textClusters			= get_text_clusters(driver,soup)#Parameter 5
        visibleLinks			= get_visible_links(driver,soup)#Parameter 6
        pageSize				= get_page_size(driver)#Parameter 7
        graphicsPercent			= get_graphics_percent(driver,pageSize)#Parameter 8
        graphicsCount 			= get_graphics_count(driver,soup)#Parameter 9
        colorCount				= get_color_count(image)#Parameter 10
        fontCount				= get_font_count(driver,soup)#Parameter 11
        colourFullness			= getColorfullness(image)#Parameter 12
        visualComplexity		= getVisualComplexity(imageGrey,num)


        tempMetrics=[
                    num,\
                    wordCount,\
                    textBodyRatio,\
                    emphTextPercent,\
                    textPositionalChanges,\
                    textClusters,\
                    visibleLinks,\
                    pageSize,\
                    graphicsPercent,\
                    graphicsCount,\
                    colorCount,\
                    fontCount,\
                    colourFullness,\
                    visualComplexity,\
                    url
            ]
        line=tempMetrics
        csvFile		= open(csvFilename,"a+")
        csvWriter	= csv.writer(csvFile)
        csvWriter.writerow(line)
        csvFile.close()
        driver.close()
    except:
        print(traceback.format_exc())
        try:
            driver.close()
        except:
            print("###Error : Couldn't close driver")
        print("Error scraping the Url")
        f2			= open(textFilename,"a+")
        f2.write(num+","+url+"\n")
        f2.close()
    print((datetime.datetime.now()-startTime).seconds,"\t",datetime.datetime.now().replace(microsecond=0),"\t",year,num,url)

In [21]:
def main(filename,year=""):
    
    fields			= ["slno","p1","p2","p3","p4","p5","p6","p7","p8","p9","p10","p11","p12","p13","url"]
    csvFilename		= "yearMetrics/tempMpUrlMetrics"+str(year)+".csv"
    csvFile			= open(csvFilename,"a+")
    csvWriter		= csv.writer(csvFile)
    csvWriter.writerow(fields)
    csvFile.close()
    
    fields			= ["id","urls"]
    csvFilename		= "yearMetrics/CorruptUrls"+str(year)+".csv"
    csvFile			= open(csvFilename,"a+")
    csvWriter		= csv.writer(csvFile)
    csvWriter.writerow(fields)
    csvFile.close()
    
    csvFile			= open(filename,"r")
    urlFile			= csv.DictReader(csvFile)
    """
    driver			= setDriverOptions()
    manager 		= mp.Manager()
    urls 			= manager.list()
    results 		= manager.list()
    pool 			= mp.Pool(1)
    results 		= pool.map_async(getMetrics, urlFile)
    while not results.ready():
        pass
    """
    for url in urlFile:
        getMetrics(url)
    csvFile.close()

In [22]:
for year in range(2018,2020):
    filename="yearUrlWb/Wb"+str(year)+".csv"
    year=str(year)
    main(filename,year)

38 	 2019-05-15 01:06:20 	 2018 1 https://web.archive.org/web/20180101000000/http://www.onesharedhouse2030.com
46 	 2019-05-15 01:07:06 	 2018 2 https://web.archive.org/web/20180101000000/https://www.architecturaldigest.com/
30 	 2019-05-15 01:07:37 	 2018 3 https://web.archive.org/web/20180101000000/http://pioneeringwomen.bwaf.org/
48 	 2019-05-15 01:08:25 	 2018 4 https://web.archive.org/web/20180101000000/https://www.studiomunge.com/
34 	 2019-05-15 01:09:00 	 2018 5 https://web.archive.org/web/20180101000000/http://www.whitearkitekter.com
25 	 2019-05-15 01:09:25 	 2018 6 https://web.archive.org/web/20180101000000/http://history.laumeiersculpturepark.org
48 	 2019-05-15 01:10:13 	 2018 7 https://web.archive.org/web/20180101000000/http://www.mondriaan.nl/en
Hiding wb toolbar error
38 	 2019-05-15 01:10:52 	 2018 8 https://web.archive.org/web/20180101000000/http://viennesemodernism2018.info
21 	 2019-05-15 01:11:13 	 2018 9 https://web.archive.org/web/20180101000000/https://www.futur

35 	 2019-05-15 01:54:35 	 2018 75 https://web.archive.org/web/20180101000000/https://manncenter.org/
70 	 2019-05-15 01:55:45 	 2018 76 https://web.archive.org/web/20180101000000/http://complexcon.com
55 	 2019-05-15 01:56:41 	 2018 77 https://web.archive.org/web/20180101000000/https://i-d.vice.com/en_us
50 	 2019-05-15 01:57:31 	 2018 78 https://web.archive.org/web/20180101000000/http://www.thecut.com/
31 	 2019-05-15 01:58:02 	 2018 79 https://web.archive.org/web/20180101000000/https://www.them.us/
Traceback (most recent call last):
  File "<ipython-input-20-db80ad8daad8>", line 10, in getMetrics
    driver.get(url)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 333, in get
    self.execute(Command.GET, {'url': url})
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/r

Hiding wb toolbar error
37 	 2019-05-15 02:43:50 	 2018 132 https://web.archive.org/web/20180101000000/http://www.fandom.com
27 	 2019-05-15 02:44:18 	 2018 133 https://web.archive.org/web/20180101000000/http://www.adultswim.com/music/singles-2017/
42 	 2019-05-15 02:45:00 	 2018 134 https://web.archive.org/web/20180101000000/http://www.complex.com
78 	 2019-05-15 02:46:18 	 2018 135 https://web.archive.org/web/20180101000000/https://experiments.withgoogle.com/webvr/inside-music/view/
39 	 2019-05-15 02:46:58 	 2018 136 https://web.archive.org/web/20180101000000/https://lnwy.co
24 	 2019-05-15 02:47:23 	 2018 137 https://web.archive.org/web/20180101000000/http://www.bigsound.buzz
17 	 2019-05-15 02:47:40 	 2018 138 https://web.archive.org/web/20180101000000/http://nytrainproject.com/
Hiding wb toolbar error
24 	 2019-05-15 02:48:05 	 2018 139 https://web.archive.org/web/20180101000000/http://www.adultswim.com/etcetera/elastic-man/
27 	 2019-05-15 02:48:33 	 2018 140 https://web.archive

Error scraping the Url
302 	 2019-05-15 03:37:47 	 2018 192 https://web.archive.org/web/20180101000000/http://www.travelandleisure.com/
57 	 2019-05-15 03:38:44 	 2018 193 https://web.archive.org/web/20180101000000/http://squarespace.com
Hiding wb toolbar error
32 	 2019-05-15 03:39:17 	 2018 194 https://web.archive.org/web/20180101000000/https://dae.sk
66 	 2019-05-15 03:40:23 	 2018 195 https://web.archive.org/web/20180101000000/https://www.youtube.com/watch?v=j5dHMB1eCmY
Hiding wb toolbar error
51 	 2019-05-15 03:41:15 	 2018 196 https://web.archive.org/web/20180101000000/https://marketfinder.thinkwithgoogle.com
21 	 2019-05-15 03:41:36 	 2018 197 https://web.archive.org/web/20180101000000/http://myportfolio.com
25 	 2019-05-15 03:42:01 	 2018 198 https://web.archive.org/web/20180101000000/https://therottenappl.es/
110 	 2019-05-15 03:43:52 	 2018 199 https://web.archive.org/web/20180101000000/https://exoplanets.nasa.gov
56 	 2019-05-15 03:44:48 	 2018 200 https://web.archive.org/we

31 	 2019-05-15 04:25:56 	 2018 241 https://web.archive.org/web/20180101000000/http://www.espn.com/espn/feature/story/_/page/body/espn-body-issue-2017
59 	 2019-05-15 04:26:56 	 2018 242 https://web.archive.org/web/20180101000000/https://www.nationalgeographic.org/projects/photo-ark/
35 	 2019-05-15 04:27:31 	 2018 243 https://web.archive.org/web/20180101000000/http://www.nationalgeographic.com/photography/proof/2017/06/hummingbirds-slow-motion-flight-videos/
32 	 2019-05-15 04:28:03 	 2018 244 https://web.archive.org/web/20180101000000/http://bearsears.patagonia.com/
43 	 2019-05-15 04:28:47 	 2018 245 https://web.archive.org/web/20180101000000/http://spacewehold.nfb.ca/
34 	 2019-05-15 04:29:22 	 2018 246 https://web.archive.org/web/20180101000000/http://www.aisforalbert.com/
30 	 2019-05-15 04:29:52 	 2018 247 https://web.archive.org/web/20180101000000/http://ueno.co
24 	 2019-05-15 04:30:17 	 2018 248 https://web.archive.org/web/20180101000000/https://play.lso.co.uk/
68 	 2019-05-1

69 	 2019-05-15 05:16:56 	 2019 32 https://web.archive.org/web/20190101000000/http://tumblr.com
35 	 2019-05-15 05:17:32 	 2019 33 https://web.archive.org/web/20190101000000/https://yourshot.nationalgeographic.com/
37 	 2019-05-15 05:18:10 	 2019 34 https://web.archive.org/web/20190101000000/http://www.kaleidoscopesociety.com
32 	 2019-05-15 05:18:42 	 2019 35 https://web.archive.org/web/20190101000000/https://prince2me.com/
Traceback (most recent call last):
  File "<ipython-input-20-db80ad8daad8>", line 42, in getMetrics
    fontCount				= get_font_count(driver,soup)#Parameter 11
  File "<ipython-input-16-4b51463ec027>", line 8, in get_font_count
    fontStr+=d.execute_script(script+'["font"];')+"font"
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 636, in execute_script
    'args': converted_args})['value']
  File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.

48 	 2019-05-15 05:59:27 	 2019 94 https://web.archive.org/web/20190101000000/https://overthedosevt.com
53 	 2019-05-15 06:00:20 	 2019 95 https://web.archive.org/web/20190101000000/https://www.nationalgeographic.com/environment/plasticpledge/
61 	 2019-05-15 06:01:22 	 2019 96 https://web.archive.org/web/20190101000000/https://climate.nasa.gov
Hiding wb toolbar error
25 	 2019-05-15 06:01:47 	 2019 97 https://web.archive.org/web/20190101000000/https://www.watercalculator.org/
Hiding wb toolbar error
43 	 2019-05-15 06:02:31 	 2019 98 https://web.archive.org/web/20190101000000/https://www.nationalgeographic.org/photoark/explore/
160 	 2019-05-15 06:05:11 	 2019 99 https://web.archive.org/web/20190101000000/https://www.google.com/about/stories/cleanwater/
81 	 2019-05-15 06:06:32 	 2019 100 https://web.archive.org/web/20190101000000/https://www.survivornet.com
39 	 2019-05-15 06:07:12 	 2019 101 https://web.archive.org/web/20190101000000/https://www.forksoverknives.com/
57 	 2019-05-15 

54 	 2019-05-15 06:50:46 	 2019 157 https://web.archive.org/web/20190101000000/https://www.compass.com/
62 	 2019-05-15 06:51:49 	 2019 158 https://web.archive.org/web/20190101000000/http://zillow.com
61 	 2019-05-15 06:52:50 	 2019 159 https://web.archive.org/web/20190101000000/https://www.coperni.co/en
40 	 2019-05-15 06:53:31 	 2019 160 https://web.archive.org/web/20190101000000/http://nooklyn.com
37 	 2019-05-15 06:54:09 	 2019 161 https://web.archive.org/web/20190101000000/http://www.realtor.com
72 	 2019-05-15 06:55:22 	 2019 162 https://web.archive.org/web/20190101000000/https://mica.edu/
38 	 2019-05-15 06:56:00 	 2019 163 https://web.archive.org/web/20190101000000/http://mitsloan.mit.edu/
29 	 2019-05-15 06:56:30 	 2019 164 https://web.archive.org/web/20190101000000/https://www.swarthmore.edu/
36 	 2019-05-15 06:57:06 	 2019 165 https://web.archive.org/web/20190101000000/https://library.harvard.edu/
43 	 2019-05-15 06:57:49 	 2019 166 https://web.archive.org/web/20190101000000

52 	 2019-05-15 07:46:10 	 2019 226 https://web.archive.org/web/20190101000000/http://culture.basicagency.com
Hiding wb toolbar error
23 	 2019-05-15 07:46:33 	 2019 227 https://web.archive.org/web/20190101000000/http://brain-drool.jp/2016/
30 	 2019-05-15 07:47:04 	 2019 228 https://web.archive.org/web/20190101000000/https://www.gramercytavern.com/
Hiding wb toolbar error
18 	 2019-05-15 07:47:22 	 2019 229 https://web.archive.org/web/20190101000000/http://www.nonbrandedurl.com/awards/oatthegoat/
20 	 2019-05-15 07:47:42 	 2019 230 https://web.archive.org/web/20190101000000/https://www.imabandi.to/
Hiding wb toolbar error
20 	 2019-05-15 07:48:02 	 2019 231 https://web.archive.org/web/20190101000000/http://sweetjellybeans.com/2019/cca/google-cloud-infrastructure/site/infrastructure/
63 	 2019-05-15 07:49:05 	 2019 232 https://web.archive.org/web/20190101000000/http://speakyourmind.squarespace.com
68 	 2019-05-15 07:50:13 	 2019 233 https://web.archive.org/web/20190101000000/https://ww

Hiding wb toolbar error
18 	 2019-05-15 08:30:05 	 2019 274 https://web.archive.org/web/20190101000000/http://www.awardsmobilityforall.com/
