Skip to content
This repository has been archived by the owner on Mar 2, 2022. It is now read-only.

Commit

Permalink
Refactor bw file selection into separate class
Browse files Browse the repository at this point in the history
Also refactor to use file sizes in bytes and compare
the length of the request rather than chosen file name.
  • Loading branch information
aagbsn committed Jan 25, 2018
1 parent 943e003 commit 98aa946
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 33 deletions.
81 changes: 49 additions & 32 deletions bwscanner/measurement.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import random
import time

from stem.descriptor.server_descriptor import ServerDescriptor
Expand All @@ -14,11 +15,48 @@

# defer.setDebugging(True)

M = 1024*1024

class DownloadIncomplete(Exception):
pass


class BwFiles(object):
def __init__(self):
self.urls = []
self.bw_files = {
64*M: ("64M", "913b3c5df256d62235f955fa936e7a4e2d5e0cb6"),
32*M: ("32M", "a536076ef51c2cfff607fec2d362671e031d6b48"),
16*M: ("16M", "e91690ed2abf05e347b61aafaa23abf2a2b3292f"),
8*M: ("8M", "c690229b300945ec4ba872b80e8c443e2e1750f0"),
4*M: ("4M", "94f7bc6679a4419b080debd70166c2e43e80533d"),
2*M: ("2M", "9793cc92932598898d22497acdd5d732037b1a13"),
}

def add_url(self, base_url):
if base_url not in self.urls:
self.urls.append(base_url)

def get_base_url(self):
return random.choice(self.urls)

def choose_file_size(self, path):
"""
Choose bandwidth file based on average bandwidth of relays on
circuit.
XXX: Should we just use the bandwidth of the measured relay instead?
"""
avg_bw = sum([r.bandwidth for r in path])/len(path)
for size in sorted(self.bw_files.keys()):
if avg_bw*5 < size:
return size
return max(self.bw_files.keys())

def choose_url(self, path):
return self.get_base_url() + '/' + self.bw_files[self.choose_file_size(path)][0]


class BwScan(object):
def __init__(self, state, clock, measurement_dir, **kwargs):
"""
Expand All @@ -44,15 +82,8 @@ def __init__(self, state, clock, measurement_dir, **kwargs):

self.tasks = []
self.circuits = None
self.baseurl = 'https://bwauth.torproject.org/bwauth.torproject.org'
self.bw_files = {
64*1024: ("64M", "913b3c5df256d62235f955fa936e7a4e2d5e0cb6"),
32*1024: ("32M", "a536076ef51c2cfff607fec2d362671e031d6b48"),
16*1024: ("16M", "e91690ed2abf05e347b61aafaa23abf2a2b3292f"),
8*1024: ("8M", "c690229b300945ec4ba872b80e8c443e2e1750f0"),
4*1024: ("4M", "94f7bc6679a4419b080debd70166c2e43e80533d"),
2*1024: ("2M", "9793cc92932598898d22497acdd5d732037b1a13"),
}
self.BwFiles = BwFiles()
#self.BwFiles.add_url('https://bwauth.torproject.org/bwauth.torproject.org')

self.result_sink = ResultSink(self.measurement_dir, chunk_size=10)

Expand All @@ -62,22 +93,6 @@ def __init__(self, state, clock, measurement_dir, **kwargs):
def now(self):
return time.time()

def choose_file_size(self, path):
"""
Choose bandwidth file based on average bandwidth of relays on
circuit.
XXX: Should we just use the bandwidth of the measured relay instead?
"""
avg_bw = sum([r.bandwidth for r in path])/len(path)
for size in sorted(self.bw_files.keys()):
if avg_bw*5 < size:
return size
return max(self.bw_files.keys())

def choose_url(self, path):
return self.baseurl + '/' + self.bw_files[self.choose_file_size(path)][0]

def run_scan(self):
all_done = defer.Deferred()
if self.scan_continuous:
Expand Down Expand Up @@ -107,26 +122,28 @@ def scan_over_next_circuit():
return all_done

def fetch(self, path):
url = self.choose_url(path)
assert None not in path
log.info("Downloading file '{file_size}' over [{relay_fp}, {exit_fp}].",
file_size=url.split('/')[-1], relay_fp=path[0].id_hex, exit_fp=path[-1].id_hex)
file_size = self.choose_file_size(path) # File size in MB
url = self.BwFiles.choose_url(path)
file_size = self.BwFiles.choose_file_size(path)
file_name = url.split('/')[-1]
log.info("Downloading file '{file_name}' over [{relay_fp}, {exit_fp}].",
file_name=file_name, relay_fp=path[0].id_hex, exit_fp=path[-1].id_hex)
time_start = self.now()

@defer.inlineCallbacks
def get_circuit_bw(result):
time_end = self.now()
if len(result) != file_size * 1024:
#XXX: this is sketchy
if len(result) != file_size:
raise DownloadIncomplete
report = dict()
report['time_end'] = time_end
report['time_start'] = time_start
request_duration = report['time_end'] - report['time_start']
report['circ_bw'] = int((file_size * 1024) // request_duration)
report['circ_bw'] = (len(result) // request_duration)
report['path'] = [r.id_hex for r in path]
log.debug("Download took {duration} for {size} MB", duration=request_duration,
size=int(file_size // 1024))
size=int(len(result)// M))

# We need to wait for these deferreds to be ready, we can't serialize
# deferreds.
Expand Down
2 changes: 1 addition & 1 deletion test/test_measurement.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_scan_chutney(self):
# check that each run is producing the same input set!
self.tmp = mkdtemp()
scan = BwScan(self.tor_state, reactor, self.tmp)
scan.baseurl = 'http://127.0.0.1:{}'.format(self.port)
scan.BwFiles.add_url('http://127.0.0.1:{}'.format(self.port))

def check_all_routers_measured(measurement_dir):
"""
Expand Down

0 comments on commit 98aa946

Please sign in to comment.