# pyOpenCGA Basic User Usage [UNDER CONSTRUCTION]

------


**[NOTE]** The server methods used by pyopencga client are defined in the following swagger URL:
- http://bioinfodev.hpc.cam.ac.uk/opencga-test/webservices

**[NOTE]** Current implemented methods are registered at the following spreadsheet:
- https://docs.google.com/spreadsheets/d/1QpU9yl3UTneqwRqFX_WAqCiCfZBk5eU-4E3K-WVvuoc/edit?usp=sharing

## Loading pyOpenCGA

In [3]:
# Initialize PYTHONPATH for pyopencga
import sys
import os

cwd = os.getcwd()
print("current_dir: ...."+cwd[-10:])

base_modules_dir = os.path.dirname(cwd)
print("base_modules_dir: ...."+base_modules_dir[-10:])

sys.path.append(base_modules_dir)

current_dir: ..../notebooks
base_modules_dir: ..../pyOpenCGA


In [4]:
from pyopencga.opencga_config import ConfigClient
from pyopencga.opencga_client import OpenCGAClient
import json

## Setting credentials for LogIn

**Credentials** 

Plese add the credentials for opencga login into a file in json format and read them from there.

i.e:
file: \__user_config.json
flie_content: {"user":"xxx","pwd":"yyy"}

In [5]:
## Reading user config/credentials to connect to server
user_config_json = "./__user_config.json"
with open(user_config_json,"r") as f:
    user_credentials = json.loads(f.read())
    
print('User: {}***'.format(user_credentials["user"][:3]))

User: caf***


In [6]:
user = user_credentials["user"]
passwd = user_credentials["pwd"]

## Creating ConfigClient for server connection configuration

In [7]:
## Creating ConfigClient
host = 'http://bioinfodev.hpc.cam.ac.uk/opencga-test'
cc = ConfigClient()
config_dict = cc.get_basic_config_dict(host)
print("Config information:\n",config_dict)

Config information:
 {'version': 'v1', 'rest': {'hosts': ['http://bioinfodev.hpc.cam.ac.uk/opencga-test']}}


## LogIn with user credentials

In [8]:
oc = OpenCGAClient(configuration=config_dict, 
                   user=user, 
                   pwd=passwd)

In [9]:
## Getting the session id / token
token = oc.session_id
print("Session token:\n{}...".format(token[:10]))

Session token:
eyJhbGciOi...


In [10]:
oc = OpenCGAClient(configuration=config_dict, 
                   session_id=token)

In [14]:
## test to check if multiple extra parameters work
fc = oc.files
fc.search(study = "cafetero@GRCH37:platinum", type="FILE")

[[{'id': 'tmp:HG00096.chrom20.small.bam',
   'annotationSets': [],
   'name': 'HG00096.chrom20.small.bam',
   'uuid': 'TM9SrwFoAAMAAR5FsdmSkQ',
   'type': 'FILE',
   'format': 'BAM',
   'bioformat': 'ALIGNMENT',
   'checksum': '',
   'uri': 'file:///tmp/HG00096.chrom20.small.bam',
   'path': 'tmp/HG00096.chrom20.small.bam',
   'release': 1,
   'creationDate': '20190114144122',
   'modificationDate': '20190306123127',
   'description': '',
   'status': {'name': 'MISSING', 'date': '20190306123127', 'message': ''},
   'external': True,
   'size': 15397,
   'experiment': {},
   'samples': [{'id': 'HG00096',
     'uuid': 'TM9TegFoAAQAARYSE1KsKQ',
     'source': 'HG00096.chrom20.small.bam',
     'individual': {'release': 0,
      'version': 0,
      'parentalConsanguinity': False},
     'release': 1,
     'version': 1,
     'creationDate': '20190114144122',
     'status': {'name': 'READY', 'date': '20190114144122', 'message': ''},
     'description': '',
     'type': '',
     'somatic': Fals

## Working with Users

In [9]:
user_client = oc.users
print(dir(user_client))

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_category', '_cfg', '_client_login_handler', '_delete', '_get', '_get_query_id_str', '_post', '_refresh_token_client', '_rest_retry', 'auto_refresh', 'configs', 'create', 'delete', 'filters', 'info', 'login', 'login_handler', 'logout', 'on_retry', 'projects', 'refresh_token', 'session_id', 'update', 'update_configs', 'update_filter', 'update_filters', 'update_password']


In [10]:
## getting user information
## [NOTE] User needs the quey_id string directly --> (user)
user_client.info(user)

[[{'id': 'cafetero',
   'name': 'Juan Valdés',
   'email': 'cafetero@cafe.rico.es',
   'organization': '',
   'account': {'type': 'full',
    'creationDate': '20180918141732',
    'expirationDate': '20190918141732',
    'authOrigin': 'internal'},
   'status': {'name': 'READY', 'date': '20180918141732', 'message': ''},
   'lastModified': '20190221155441344',
   'size': -1,
   'quota': 200000,
   'projects': [{'id': 'GRCH37',
     'name': 'GRCH37',
     'uuid': '7Q5e4QFlAAEAAQ7Nii5FVA',
     'fqn': 'cafetero@GRCH37',
     'creationDate': '20180918142106',
     'description': '',
     'organization': '',
     'organism': {'scientificName': 'Homo Sapiens',
      'commonName': 'grch37',
      'taxonomyCode': 0,
      'assembly': 'grch37'},
     'currentRelease': 1,
     'status': {'name': 'READY', 'date': '20180918142106', 'message': ''},
     'size': 0,
     'studies': [{'id': 'platinum',
       'name': 'Platinum',
       'uuid': '7REdkAFlAAIAAVnBP_uPGg',
       'type': 'CASE_CONTROL',
   

In [12]:
## Getting user projects
## [NOTE] Client specific methods have the query_id as a key:value (i.e (user=user_id)) 
user_client.projects(user=user)

[[{'id': 'GRCH37',
   'name': 'GRCH37',
   'uuid': '7Q5e4QFlAAEAAQ7Nii5FVA',
   'fqn': 'cafetero@GRCH37',
   'creationDate': '20180918142106',
   'description': '',
   'organization': '',
   'organism': {'scientificName': 'Homo Sapiens',
    'commonName': 'grch37',
    'taxonomyCode': 0,
    'assembly': 'grch37'},
   'currentRelease': 1,
   'status': {'name': 'READY', 'date': '20180918142106', 'message': ''},
   'size': 0,
   'studies': [{'id': 'platinum',
     'name': 'Platinum',
     'uuid': '7REdkAFlAAIAAVnBP_uPGg',
     'type': 'CASE_CONTROL',
     'creationDate': '20180918142406',
     'modificationDate': '20180918142406',
     'description': '',
     'status': {'name': 'READY', 'date': '20180918142406', 'message': ''},
     'lastModified': '20180918142406',
     'size': 0,
     'cipher': 'none',
     'fqn': 'cafetero@GRCH37:platinum',
     'groups': [{'id': '@members',
       'name': 'members',
       'userIds': ['test2', 'test3', 'test4', 'test1', 'cafetero']},
      {'id': '@ad

In [13]:
## Getting user configurations
## [NOTE] Client specific methods have the query_id as a key:value (i.e (user=user_id)) 
user_client.configs(user=user)

[[{}]]

In [14]:
## Getting user filter configurations
## [NOTE] Client specific methods have the query_id as a key:value (i.e (user=user_id)) 
user_client.filters(user=user)

[[{'name': 'Indels',
   'description': 'Fetch all INDEL variants',
   'bioformat': 'VARIANT',
   'query': {'studies': 'cafetero@GRCH37:platinum', 'type': 'INDEL'},
   'options': {}}]]

## Working with Projects

In [15]:
project_client = oc.projects
print(dir(oc.projects))

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_category', '_cfg', '_client_login_handler', '_delete', '_get', '_get_query_id_str', '_post', '_refresh_token_client', '_rest_retry', 'auto_refresh', 'create', 'delete', 'increment_release', 'info', 'login_handler', 'on_retry', 'search', 'session_id', 'stats', 'studies', 'update']


In [16]:
## Getting all projects from logged in user
project_client.search()

[[{'id': 'GRCH37',
   'name': 'GRCH37',
   'uuid': '7Q5e4QFlAAEAAQ7Nii5FVA',
   'fqn': 'cafetero@GRCH37',
   'creationDate': '20180918142106',
   'description': '',
   'organization': '',
   'organism': {'scientificName': 'Homo Sapiens',
    'commonName': 'grch37',
    'taxonomyCode': 0,
    'assembly': 'grch37'},
   'currentRelease': 1,
   'status': {'name': 'READY', 'date': '20180918142106', 'message': ''},
   'size': 0,
   'studies': [{'id': 'platinum',
     'name': 'Platinum',
     'uuid': '7REdkAFlAAIAAVnBP_uPGg',
     'type': 'CASE_CONTROL',
     'creationDate': '20180918142406',
     'modificationDate': '20180918142406',
     'description': '',
     'status': {'name': 'READY', 'date': '20180918142406', 'message': ''},
     'lastModified': '20180918142406',
     'size': 0,
     'cipher': 'none',
     'fqn': 'cafetero@GRCH37:platinum',
     'groups': [{'id': '@members',
       'name': 'members',
       'userIds': ['test2', 'test3', 'test4', 'test1', 'cafetero']},
      {'id': '@ad

In [17]:
## Getting information from a specific project
project_client.info('GRCH37')

[[{'id': 'GRCH37',
   'name': 'GRCH37',
   'uuid': '7Q5e4QFlAAEAAQ7Nii5FVA',
   'fqn': 'cafetero@GRCH37',
   'creationDate': '20180918142106',
   'description': '',
   'organization': '',
   'organism': {'scientificName': 'Homo Sapiens',
    'commonName': 'grch37',
    'taxonomyCode': 0,
    'assembly': 'grch37'},
   'currentRelease': 1,
   'status': {'name': 'READY', 'date': '20180918142106', 'message': ''},
   'size': 0,
   'studies': [{'id': 'platinum',
     'name': 'Platinum',
     'uuid': '7REdkAFlAAIAAVnBP_uPGg',
     'type': 'CASE_CONTROL',
     'creationDate': '20180918142406',
     'modificationDate': '20180918142406',
     'description': '',
     'status': {'name': 'READY', 'date': '20180918142406', 'message': ''},
     'lastModified': '20180918142406',
     'size': 0,
     'cipher': 'none',
     'fqn': 'cafetero@GRCH37:platinum',
     'groups': [{'id': '@members',
       'name': 'members',
       'userIds': ['test2', 'test3', 'test4', 'test1', 'cafetero']},
      {'id': '@ad

In [18]:
## Fetching the studies from a project
project_client.studies('GRCH37')

[[{'id': 'platinum',
   'name': 'Platinum',
   'uuid': '7REdkAFlAAIAAVnBP_uPGg',
   'type': 'CASE_CONTROL',
   'creationDate': '20180918142406',
   'modificationDate': '20180918142406',
   'description': '',
   'status': {'name': 'READY', 'date': '20180918142406', 'message': ''},
   'lastModified': '20180918142406',
   'size': 0,
   'cipher': 'none',
   'fqn': 'cafetero@GRCH37:platinum',
   'groups': [{'id': '@members',
     'name': 'members',
     'userIds': ['test2', 'test3', 'test4', 'test1', 'cafetero']},
    {'id': '@admins', 'name': 'admins', 'userIds': []}],
   'experiments': [],
   'files': [],
   'jobs': [],
   'individuals': [],
   'samples': [],
   'datasets': [],
   'cohorts': [],
   'panels': [],
   'variableSets': [],
   'permissionRules': {},
   'uri': 'file:///mnt/data/opencga-test/sessions/users/cafetero/projects/1/2/',
   'release': 1,
   'dataStores': {},
   'stats': {},
   'attributes': {}},
  {'id': 'GNOMAD_EXOMES',
   'name': 'GenomAD Exomes',
   'uuid': '7RFu4AFl

In [19]:
## Getting stats from a project --> It seems that the Solr service is not working!!!
project_client.stats('GRCH37')

Exception: b'{"apiVersion":"v1","time":136,"warning":"","error":"Solr server is not alive","queryOptions":{"metadata":true,"skipCount":true,"skip":0,"limit":1000},"response":[{"id":"","dbTime":-1,"numResults":-1,"numTotalResults":-1,"warningMsg":"Future errors will ONLY be shown in the QueryResponse body","errorMsg":"DEPRECATED: org.apache.solr.common.SolrException: Solr server is not alive","resultType":"","result":[]}]}'