diff --git a/backend/application.py b/backend/application.py index 8a3b7ca92..ac5f8989f 100644 --- a/backend/application.py +++ b/backend/application.py @@ -33,7 +33,7 @@ def build_dataset_structure(dataset_version, user=None, dataset=None): if user: r['is_admin'] = user.is_admin(dataset) - if user.has_access(dataset): + if user.has_access(dataset, dataset_version.version): r['authorization_level'] = 'has_access' elif user.has_requested_access(dataset): r['authorization_level'] = 'has_requested_access' @@ -183,15 +183,13 @@ def get(self, dataset): user = self.current_user dataset = db.get_dataset(dataset) - versions = db.DatasetVersion.select( - db.DatasetVersion.version, db.DatasetVersion.available_from - ).where( - db.DatasetVersion.dataset == dataset - ) + versions = (db.DatasetVersion.select(db.DatasetVersion.version, + db.DatasetVersion.available_from) + .where(db.DatasetVersion.dataset == dataset)) logging.info("ListDatasetVersions") - data = [] found_current = False + versions = sorted(versions, key=lambda version: version.version) for v in reversed(versions): current = False future = False @@ -218,10 +216,10 @@ def get(self, dataset): class GenerateTemporaryLink(handlers.AuthorizedHandler): - def post(self, dataset, version=None): - dataset, version = utils.parse_dataset(dataset, version) + def post(self, dataset, ds_version=None): + dataset, ds_version = utils.parse_dataset(dataset, ds_version) user = self.current_user - dataset_version = db.get_dataset_version(dataset, version) + dataset_version = db.get_dataset_version(dataset, ds_version) if dataset_version is None: self.send_error(status_code=404) return @@ -248,9 +246,9 @@ def post(self, dataset, version=None): class DatasetFiles(handlers.AuthorizedHandler): - def get(self, dataset, version=None): - dataset, version = utils.parse_dataset(dataset, version) - dataset_version = db.get_dataset_version(dataset, version) + def get(self, dataset, ds_version=None): + dataset, ds_version = utils.parse_dataset(dataset, ds_version) + dataset_version = db.get_dataset_version(dataset, ds_version) if dataset_version is None: self.send_error(status_code=404) return @@ -264,6 +262,7 @@ def get(self, dataset, version=None): self.finish({'files': ret}) + def format_bytes(nbytes): postfixes = ['b', 'Kb', 'Mb', 'Gb', 'Tb', 'Pb', 'Eb'] exponent = math.floor( math.log(nbytes) / math.log(1000) ) @@ -271,7 +270,7 @@ def format_bytes(nbytes): class Collection(handlers.UnsafeHandler): - def get(self, dataset): + def get(self, dataset, ds_version=None): dataset, _ = utils.parse_dataset(dataset) dataset = db.get_dataset(dataset) diff --git a/backend/db.py b/backend/db.py index 1ea48aa4c..1049e0b89 100644 --- a/backend/db.py +++ b/backend/db.py @@ -159,6 +159,7 @@ class Meta: class Dataset(BaseModel): """ A dataset is part of a study, and usually include a certain population. + Most studies only have a single dataset, but multiple are allowed. """ class Meta: @@ -213,6 +214,9 @@ class Meta: data_contact_link = CharField(null=True) num_variants = IntegerField(null=True) coverage_levels = ArrayField(IntegerField, null=True) + portal_avail = BooleanField(null=True) + file_access = EnumField(null=False, choices=['None', 'Controlled', 'Registered', 'Public']) + beacon_access = EnumField(null=False, choices=['None', 'Controlled', 'Registered', 'Public']) class DatasetFile(BaseModel): @@ -334,16 +338,35 @@ def is_admin(self, dataset): DatasetAccess.is_admin ).count() - def has_access(self, dataset): - return DatasetAccessCurrent.select().where( - DatasetAccessCurrent.dataset == dataset, - DatasetAccessCurrent.user == self, - ).count() + def has_access(self, dataset, ds_version=None): + """ + Check whether user has permission to access a dataset + + Args: + dataset (Database): peewee Database object + ds_version (str): the dataset version + + Returns: + bool: allowed to access + + """ + dsv = get_dataset_version(dataset.short_name, ds_version) + if not dsv: + return False + if dsv.file_access in ('Registered', 'Public'): + return True + elif dsv.file_access == 'None': + return False + + return (DatasetAccessCurrent.select() + .where(DatasetAccessCurrent.dataset == dataset, + DatasetAccessCurrent.user == self) + .count()) > 0 def has_requested_access(self, dataset): return DatasetAccessPending.select().where( DatasetAccessPending.dataset == dataset, - DatasetAccessPending.user == self + DatasetAccessPending.user == self ).count() diff --git a/backend/handlers.py b/backend/handlers.py index 4321081b1..2ff0e641a 100644 --- a/backend/handlers.py +++ b/backend/handlers.py @@ -130,12 +130,15 @@ def prepare(self): return kwargs = self.path_kwargs - if not kwargs['dataset']: + if not 'dataset' in kwargs: logging.debug("No dataset: Send error 403") self.send_error(status_code=403) - if not self.current_user.has_access( db.get_dataset(kwargs['dataset']) ): + return + ds_version = kwargs['ds_version'] if 'ds_version' in kwargs else None + if not self.current_user.has_access(db.get_dataset(kwargs['dataset']), ds_version): logging.debug("No user access: Send error 403") self.send_error(status_code=403) + return logging.debug("User is authorized") @@ -150,9 +153,11 @@ def prepare(self): if not kwargs['dataset']: logging.debug("No dataset: Send error 403") self.send_error(status_code=403) + return if not self.current_user.is_admin( db.get_dataset(kwargs['dataset']) ): logging.debug("No user admin: Send error 403") self.send_error(status_code=403) + return class SafeStaticFileHandler(tornado.web.StaticFileHandler, SafeHandler): @@ -179,20 +184,23 @@ def initialize(self, path): path = "/" + path self.root = path - def get(self, dataset, file, user=None): + def get(self, dataset, file, ds_version=None, user=None): logging.debug("Want to download dataset {} ({})".format(dataset, file)) if not user: user = self.current_user - dbfile = (db.DatasetFile - .select() - .where(db.DatasetFile.name == file) - .get()) - db.UserDownloadLog.create( - user = user, - dataset_file = dbfile - ) + try: + dbfile = (db.DatasetFile.select() + .join(db.DatasetVersion) + .where((db.DatasetFile.name == file) & + (db.DatasetVersion.version == ds_version)) + .get()) + except db.DatasetFile.DoesNotExist: + self.send_error(status_code=403) + return + + db.UserDownloadLog.create(user = user, dataset_file = dbfile) abspath = os.path.abspath(os.path.join(self.root, file)) self.set_header("X-Accel-Redirect", abspath) diff --git a/backend/route.py b/backend/route.py index 4b5179f12..48e8d44dc 100644 --- a/backend/route.py +++ b/backend/route.py @@ -45,7 +45,7 @@ def __init__(self, settings): {"path": "static/img/"}), (r"/release/(?P[^\/]+)/(?P[^\/]+)/(?P[^\/]+)",handlers.TemporaryStaticNginxFileHandler, {"path": "/release-files/"}), - (r"/release/(?P[^\/]+)/(?P[^\/]+)", handlers.AuthorizedStaticNginxFileHandler, + (r"/release/(?P[^\/]+)/versions/(?P[^/]+)/(?P[^\/]+)", handlers.AuthorizedStaticNginxFileHandler, {"path": "/release-files/"}), ## Authentication (r"/logout", auth.ElixirLogoutHandler), @@ -65,11 +65,11 @@ def __init__(self, settings): (r"/api/dataset/(?P[^\/]+)", application.GetDataset), (r"/api/dataset/(?P[^\/]+)/log/(?P[^\/]+)/(?P[^\/]+)", application.LogEvent), (r"/api/dataset/(?P[^\/]+)/logo", application.ServeLogo), - (r"/api/dataset/(?P[^\/]+)/files", application.DatasetFiles), - (r"/api/dataset/(?P[^\/]+)/collection", application.Collection), + (r"/api/dataset/(?P[^\/]+)/(?:versions/(?P[^/]+)/)?files", application.DatasetFiles), + (r"/api/dataset/(?P[^\/]+)/(?:versions/(?P[^/]+)/)?collection", application.Collection), (r"/api/dataset/(?P[^\/]+)/users_current", application.DatasetUsersCurrent), (r"/api/dataset/(?P[^\/]+)/users_pending", application.DatasetUsersPending), - (r"/api/dataset/(?P[^\/]+)/temporary_link", application.GenerateTemporaryLink), + (r"/api/dataset/(?P[^\/]+)/(?:versions/(?P[^/]+)/)?temporary_link", application.GenerateTemporaryLink), (r"/api/dataset/(?P[^\/]+)/users/[^\/]+/request", application.RequestAccess), (r"/api/dataset/(?P[^\/]+)/users/(?P[^\/]+)/approve", application.ApproveUser), (r"/api/dataset/(?P[^\/]+)/users/(?P[^\/]+)/revoke", application.RevokeUser), diff --git a/test/data/load_dummy_data.sql b/test/data/load_dummy_data.sql index 3751c7ffa..4bcf9e522 100644 --- a/test/data/load_dummy_data.sql +++ b/test/data/load_dummy_data.sql @@ -25,10 +25,10 @@ INSERT INTO data.sample_sets (id, dataset, "collection", sample_size, phenotype) (1000003, 1000002, 1000003, 20, 'SamplePheno2 Coll2'); INSERT INTO data.dataset_versions (id, dataset, reference_set, dataset_version, dataset_description, terms, var_call_ref, available_from, ref_doi, data_contact_name, data_contact_link, num_variants, coverage_levels, portal_avail, file_access, beacon_access) - VALUES (1000001, 1000001, 1000001, 'Version 1-1', 'Dataset 1-1, description', 'Dataset 1-1, terms', 'CallRef11', '2017-01-01', 'datset11DOI', 'Gunnar Green', 'gunnar.green@example.com', 10, ARRAY[1,5,10], TRUE, 'Registered', 'Public'), - (1000002, 1000002, 1000001, 'Version 2-1', 'Dataset 2-1, description', 'Dataset 2-1, terms', 'CallRef21', '2017-02-01', 'datset21DOI', NULL, NULL, 100, ARRAY[1,5,10], TRUE, 'Registered', 'Public'), - (1000003, 1000002, 1000002, 'Version 2-2', 'Dataset 2-2, description', 'Dataset 2-2, terms', 'CallRef22', '2017-02-02', 'datset22DOI', 'Strummer project', 'https://example.com/strummer', 1000, ARRAY[1,5,10], TRUE, 'Registered', 'Public'), - (1000004, 1000002, 1000002, 'InvVer 2-3', 'Dataset 2-3, description', 'Dataset 2-3, terms', 'CallRef23', '2030-02-03', 'datset23DOI', 'Drummer project', 'https://example.com/drummer', 10000, ARRAY[1,5,10], TRUE, 'Registered', 'Public'); + VALUES (1000001, 1000001, 1000001, 'Version 1-1', 'Dataset 1-1, description', 'Dataset 1-1, terms', 'CallRef11', '2017-01-01', 'datset11DOI', 'Gunnar Green', 'gunnar.green@example.com', 10, ARRAY[1,5,10], TRUE, 'Controlled', 'Public'), + (1000002, 1000002, 1000001, 'Version 2-1', 'Dataset 2-1, description', 'Dataset 2-1, terms', 'CallRef21', '2017-02-01', 'datset21DOI', NULL, NULL, 100, ARRAY[1,5,10], TRUE, 'Controlled', 'Public'), + (1000003, 1000002, 1000002, 'Version 2-2', 'Dataset 2-2, description', 'Dataset 2-2, terms', 'CallRef22', '2017-02-02', 'datset22DOI', 'Strummer project', 'https://example.com/strummer', 1000, ARRAY[1,5,10], TRUE, 'Controlled', 'Public'), + (1000004, 1000002, 1000002, 'InvVer 2-3', 'Dataset 2-3, description', 'Dataset 2-3, terms', 'CallRef23', '2030-02-03', 'datset23DOI', 'Drummer project', 'https://example.com/drummer', 10000, ARRAY[1,5,10], TRUE, 'Controlled', 'Public'); INSERT INTO data.dataset_files(id, dataset_version, basename, uri, file_size) VALUES (1000001, 1000001, 'File11-1', '/release/file111.txt', 100),