Skip to content
This repository has been archived by the owner on Oct 22, 2022. It is now read-only.

Commit

Permalink
Merge pull request #172 from CanDIG/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
jimmyhli committed Jul 12, 2019
2 parents ab56c68 + a90595e commit d73c55c
Show file tree
Hide file tree
Showing 119 changed files with 2,097 additions and 2,316 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
language: python
python:
- "2.7"
- "3.6"
sudo: false

cache:
Expand Down
5 changes: 1 addition & 4 deletions candig/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
"""
GA4GH server
CanDIG server
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

__import__('pkg_resources').declare_namespace(__name__)
2 changes: 1 addition & 1 deletion candig/server/DP.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def _add_noise(self, vec, server, sensitivity=1.):
scale = np.float64(sensitivity) / np.float64(self._epsilon[server])

if type(vec) is dict:
if not len(vec.keys()):
if not vec:
# print("Warning! can't add noise to empty input");
return vec
for pop, v in vec.items():
Expand Down
2 changes: 1 addition & 1 deletion candig/server/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Reference implementation of the GA4GH APIs.
Implementation of the CanDIG APIs.
"""
# Don't include future imports here; we don't want to export them as
# part of the package
Expand Down
6 changes: 3 additions & 3 deletions candig/server/auth/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def callback_handling():
raise exceptions.NotAuthorizedException(
'The callback from Auth0 did not'
'include the expected tokens: \n'
'{}'.format(e.message))
'{}'.format(str(e)))
# Get profile information
try:
user_url = \
Expand All @@ -148,7 +148,7 @@ def callback_handling():
except Exception as e:
raise exceptions.NotAuthorizedException(
'The user profile from Auth0 did '
'not contain the expected data: \n {}'.format(e.message))
'not contain the expected data: \n {}'.format(str(e)))
# Log token in
user = cache.get(email)
if user and user['authorized']:
Expand Down Expand Up @@ -271,7 +271,7 @@ def _decode_header(auth_header, client_id, client_secret):
'Token signature could not be validated.')
except Exception as e:
raise exceptions.NotAuthorizedException(
'Token signature was malformed. {}'.format(e.message))
'Token signature was malformed. {}'.format(str(e)))
return token, payload


Expand Down
17 changes: 7 additions & 10 deletions candig/server/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
Module responsible for handling protocol requests and returning
responses.
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import candig.server.datamodel as datamodel
import candig.server.exceptions as exceptions
Expand All @@ -15,7 +12,7 @@
from google.protobuf.json_format import MessageToDict
import json
import itertools
import DP as DP
import candig.server.DP as DP


class Backend(object):
Expand Down Expand Up @@ -221,9 +218,9 @@ def logicHandler(self, logic, responses, dataset_id, access_map):
op_keys = ['and', 'or']
logic_negate = False

if len(logic.keys()) == 1:
logic_key = logic.keys()[0]
elif len(logic.keys()) == 2:
if len(logic) == 1:
logic_key = list(logic.keys())[0]
elif len(logic) == 2:
if {'id', 'negate'} == set(logic.keys()):
logic_key = 'id'
logic_negate = bool(logic['negate'])
Expand Down Expand Up @@ -502,7 +499,7 @@ def aggregationHandler(self, table, results, field):
table = "variants"
try:
for entry in json_results[table]:
for k, v in entry.iteritems():
for k, v in entry.items():
if k in field:
if k not in field_value_counts:
field_value_counts[k] = {}
Expand Down Expand Up @@ -1073,7 +1070,7 @@ def _readGroupSetsGenerator(self, request, numObjects, getByIndexMethod):
if request.name and request.name != obj.getLocalId():
include = False
if request.biosample_id and include:
rgsp.ClearField(b"read_groups")
rgsp.ClearField("read_groups")
for readGroup in obj.getReadGroups():
if request.biosample_id == readGroup.getBiosampleId():
rgsp.read_groups.extend(
Expand Down Expand Up @@ -1607,7 +1604,7 @@ def runSearchGenotypesRequest(self, requestStr, access_map,
for gt_variant, nextPageToken in objectGenerator(request, access_map):
genotypemtx, variant, callsetids = gt_variant
genotyperows.append(genotypemtx)
variant.ClearField(b"calls")
variant.ClearField("calls")
variants.append(variant)
if callsetIds is None:
callsetIds = callsetids
Expand Down
3 changes: 0 additions & 3 deletions candig/server/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
"""
Functionality common to cli modules
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import candig.server

Expand Down
5 changes: 1 addition & 4 deletions candig/server/cli/configtest.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
"""
configtest cli
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import logging
import unittest

import candig.server.cli as cli
import candig.server.configtest as configtest

import ga4gh.common.cli as common_cli
import candig.common.cli as common_cli


class SimplerResult(unittest.TestResult):
Expand Down
21 changes: 9 additions & 12 deletions candig/server/cli/repomanager.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
"""
repo manager cli
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import glob
import json
import os
import sys
import textwrap
import traceback
import urlparse
import urllib.parse

import candig.server.cli as cli
import candig.server.datamodel.bio_metadata as bio_metadata
Expand All @@ -29,7 +26,7 @@
import candig.server.exceptions as exceptions
import candig.server.repo.rnaseq2ga as rnaseq2ga

import ga4gh.common.cli as common_cli
import candig.common.cli as common_cli


def getNameFromPath(filePath):
Expand All @@ -52,7 +49,7 @@ def getRawInput(display):
Wrapper around raw_input; put into separate function so that it
can be easily mocked for tests.
"""
return raw_input(display)
return input(display)


class RepoManager(object):
Expand Down Expand Up @@ -208,7 +205,7 @@ def addReadGroupSet(self):
dataset = self._repo.getDatasetByName(self._args.datasetName)
dataUrl = self._args.dataFile
indexFile = self._args.indexFile
parsed = urlparse.urlparse(dataUrl)
parsed = urllib.parse.urlparse(dataUrl)
# TODO, add https support and others when they have been
# tested.
if parsed.scheme in ['http', 'ftp']:
Expand Down Expand Up @@ -274,10 +271,10 @@ def addVariantSet(self):
"Cannot infer the intended name of the VariantSet when "
"more than one VCF file is provided. Please provide a "
"name argument using --name.")
parsed = urlparse.urlparse(dataUrls[0])
parsed = urllib.parse.urlparse(dataUrls[0])
if parsed.scheme not in ['http', 'ftp']:
dataUrls = map(lambda url: self._getFilePath(
url, self._args.relativePath), dataUrls)
dataUrls = [self._getFilePath(
url, self._args.relativePath) for url in dataUrls]
# Now, get the index files for the data files that we've now obtained.
indexFiles = self._args.indexFiles
if indexFiles is None:
Expand All @@ -296,8 +293,8 @@ def addVariantSet(self):
indexSuffix = ".tbi"
# TODO support BCF input properly here by adding .csi
indexFiles = [filename + indexSuffix for filename in dataUrls]
indexFiles = map(lambda url: self._getFilePath(
url, self._args.relativePath), indexFiles)
indexFiles = [self._getFilePath(
url, self._args.relativePath) for url in indexFiles]
variantSet = variants.HtslibVariantSet(dataset, name)
variantSet.populateFromFile(dataUrls, indexFiles)
# Get the reference set that is associated with the variant set.
Expand Down
9 changes: 3 additions & 6 deletions candig/server/cli/server.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
"""
Server cli
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import requests

Expand All @@ -12,7 +9,7 @@
import candig.server.cli as cli
import candig.server.frontend as frontend

import ga4gh.common.cli as common_cli
import candig.common.cli as common_cli


class StandaloneApplication(gunicorn.app.base.BaseApplication):
Expand All @@ -23,9 +20,9 @@ def __init__(self, app, options=None):

def load_config(self):
config = dict(
[(key, value) for key, value in self.options.iteritems()
[(key, value) for key, value in self.options.items()
if key in self.cfg.settings and value is not None])
for key, value in config.iteritems():
for key, value in config.items():
self.cfg.set(key.lower(), value)

def load(self):
Expand Down
3 changes: 0 additions & 3 deletions candig/server/configtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
Tests are standard python unittest tests.
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import unittest
import flask
Expand Down
40 changes: 21 additions & 19 deletions candig/server/datamodel/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
"""
The GA4GH data model. Defines all the methods required to translate
The CanDIG data model. Defines all the methods required to translate
data in existing formats into GA4GH protocol types.
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import base64
import collections
Expand All @@ -15,9 +12,10 @@
import difflib

import candig.server.exceptions as exceptions

import candig.schemas.protocol as protocol

from binascii import Error as binascii_error


class PysamFileHandleCache(object):
"""
Expand Down Expand Up @@ -152,7 +150,7 @@ def __init__(self, parentCompoundId, *localIds):
localIds = localIds[:differentiatorIndex] + tuple([
self.differentiator]) + localIds[differentiatorIndex:]
for field, localId in zip(self.fields[index:], localIds):
if not isinstance(localId, basestring):
if not isinstance(localId, str):
raise exceptions.BadIdentifierNotStringException(localId)
encodedLocalId = self.encode(localId)
setattr(self, field, encodedLocalId)
Expand Down Expand Up @@ -216,14 +214,15 @@ def parse(cls, compoundIdStr):
identifier (under our internal rules) is provided, the response should
be that the identifier does not exist.
"""
if not isinstance(compoundIdStr, basestring):
if not isinstance(compoundIdStr, str):
raise exceptions.BadIdentifierException(compoundIdStr)
try:
deobfuscated = cls.deobfuscate(compoundIdStr)
except TypeError:
except binascii_error:
# When a string that cannot be converted to base64 is passed
# as an argument, b64decode raises a TypeError. We must treat
# this as an ID not found error.
# In Python 3, it raises a binascii.Error instead of TypeError
raise exceptions.ObjectWithIdNotFoundException(compoundIdStr)
try:
encodedSplits = cls.split(deobfuscated)
Expand Down Expand Up @@ -255,21 +254,24 @@ def obfuscate(cls, idStr):
fashion. This is not intended for security purposes, but rather to
dissuade users from depending on our internal ID structures.
"""
return unicode(base64.urlsafe_b64encode(
idStr.encode('utf-8')).replace(b'=', b''))
return base64.urlsafe_b64encode(
idStr.encode('utf-8')).replace(b'=', b'').decode('utf-8')

@classmethod
def deobfuscate(cls, data):
"""
Reverses the obfuscation done by the :meth:`obfuscate` method.
If an identifier arrives without correct base64 padding this
function will append it to the end.
TODO: Temporary fix. Need to revisit in future.
"""
# the str() call is necessary to convert the unicode string
# to an ascii string since the urlsafe_b64decode method
# sometimes chokes on unicode strings
return base64.urlsafe_b64decode(str((
data + b'A=='[(len(data) - 1) % 4:])))

decoded_data = base64.urlsafe_b64decode(data + ('A=='[(len(data) - 1) % 4:]))

try:
return decoded_data.decode('utf-8')
except UnicodeDecodeError:
raise exceptions.ObjectWithIdNotFoundException(decoded_data)

@classmethod
def getInvalidIdString(cls):
Expand Down Expand Up @@ -930,7 +932,7 @@ def assertInRange(cls, attr, minVal, maxVal, attrName):

@classmethod
def assertInt(cls, attr, attrName):
if not isinstance(attr, (int, long)):
if not isinstance(attr, int):
message = "invalid {} '{}' not an int".format(attrName, attr)
raise exceptions.DatamodelValidationException(message)

Expand All @@ -945,12 +947,12 @@ def sanitizeInt(cls, attr, minVal, maxVal, attrName):

@classmethod
def sanitizeString(cls, attr, attrName):
if not isinstance(attr, basestring):
if not isinstance(attr, str):
message = "invalid {} '{}' not a string".format(
attrName, attr)
raise exceptions.DatamodelValidationException(message)
if isinstance(attr, unicode):
attr = attr.encode('utf8')
if isinstance(attr, str):
attr = attr
if len(attr) > cls.maxStringLength:
attr = attr[:cls.maxStringLength]
return attr
Expand Down
3 changes: 0 additions & 3 deletions candig/server/datamodel/bio_metadata.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
"""
Biodata objects
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import datetime
import json
Expand Down

0 comments on commit d73c55c

Please sign in to comment.