In [1]:
import hashlib, random, re, sqlite3, urllib2

In [2]:
class SqliteKVS:
    def __init__(self, database_path):
        self.conn = sqlite3.connect(database_path)
        self.cur = self.conn.cursor()
        self.cur.execute(('CREATE TABLE IF NOT EXISTS kvs'
                          ' (key PRIMARY KEY, value)'
                          ' WITHOUT ROWID;'))
        self.conn.commit()
        
    def get(self, key):
        self.cur.execute('SELECT value FROM kvs WHERE key=?;', (key,))
        rows = self.cur.fetchall()
        if not rows:
            return None
        assert len(rows) == 1
        return rows[0][0]
    
    def set(self, key, value):
        self.cur.execute(('INSERT OR REPLACE INTO kvs (key, value)'
                          ' VALUES (?, ?);'),
                         (key, value))
        self.conn.commit()

tilehashes = SqliteKVS('tilehashes.db')

In [3]:
# access_log parsing regex at https://regex101.com/r/w01dpj/1
access_log_re = r"^(?P<local_host>[^ ]+?) (?P<remote_host>[^ ]+) (?P<user1>[^ ]+) (?P<user2>[^ ]+) \[(?P<date>[^\]]+)\] \"(?P<request>.[^\"]*)\" (?P<status>[\d]+) (?P<len>[\d]+) \"(?P<referer>[^\"]*)\" \"(?P<user_agent>[^\"]*)\""

tilefetch_urls = []

for line in open('sample_tileserver_access_log'):
    url = re.match(access_log_re, line).group('request').split()[1]
    if url.startswith('/tilesv1'):
        tilefetch_urls.append(url)
    
print 'Collected %d urls' % len(tilefetch_urls)

# Shuffle deterministically
random.seed(999999999)
random.shuffle(tilefetch_urls)

Collected 5869 urls


In [4]:
# sudo systemctl restart dotmaptiles-staging

write_test_hashes = False

if write_test_hashes:
    for url in tilefetch_urls[0:1000]:
        data = urllib2.urlopen('http://dotmaptiles-staging.createlab.org' + url).read()
        tilehashes.set(url, hashlib.sha256(data).hexdigest())

In [25]:
# before 100 tiles: 7GB
# now 5900 tiles: 4.8GB!

fetched_tiles = 0
checked_tiles = 0

for url in tilefetch_urls:
    data = urllib2.urlopen('http://dotmaptiles-staging.createlab.org' + url).read()
    expected = tilehashes.get(url)
    fetched_tiles += 1
    if expected:
        assert hashlib.sha256(data).hexdigest() == expected
        checked_tiles += 1
print 'Test succeeded:  checked {checked_tiles} of a total {fetched_tiles} tiles fetched'.format(**locals())


Test succeeded:  checked 1831 of a total 5869 tiles fetched
