In [None]:
## import the necessary libraries
from thrift.transport import TTransport, TSocket
from thrift.protocol.TBinaryProtocol import TBinaryProtocol

from pyseek import SeekRPC
from pyseek.ttypes import SeekQuery, QueryParams, QueryResult

In [None]:
## establish a server connection
host = 'localhost'
port = 9090

socket = TSocket.TSocket(host, port)
transport = TTransport.TBufferedTransport(socket)
protocol = TBinaryProtocol(transport)
client = SeekRPC.Client(protocol)
transport.open()

In [None]:
## run a query
def runQuery(species, genes, useSymbols=False, numToPrint=10):
    params = QueryParams(distance_measure="ZscoreHubbinessCorrected",
                         min_query_genes_fraction=0.5,
                         min_genome_fraction=0.5,
                         use_gene_symbols=useSymbols)

    query = SeekQuery(species=species, genes=genes, parameters=params)
    
    result = client.seek_query(query)
    if result.success is True:
        for i, gs in enumerate(result.gene_scores):
            print(f'gene: {gs.name}, {gs.value}')
            if i > numToPrint: break

        for i, ds in enumerate(result.dataset_weights):
            print(f'dset: {ds.name}, {ds.value}')
            if i > numToPrint: break
    else:
        print(f'query error: {result.statusMsg}')


In [None]:
# functions to run a set of queries in parallel
class Query(object):
    def __init__(self, species="Unknown", genes=None, useSymbols=False):
        self.species = species
        self.genes = genes
        self.useSymbols = useSymbols

def initQuery(query):
    params = QueryParams(distance_measure="ZscoreHubbinessCorrected",
                         min_query_genes_fraction=0.5,
                         min_genome_fraction=0.5,
                         use_gene_symbols=query.useSymbols)

    query = SeekQuery(species=query.species, genes=query.genes, parameters=params)
    task_id = client.seek_query_async(query)
    return task_id

def completeQuery(task_id, numToPrint=10):
    result = client.seek_get_result(task_id)
    if result.success is True:
        print(f'### {task_id} ###')
        for i, gs in enumerate(result.gene_scores):
            print(f'gene: {gs.name}, {gs.value}')
            if i > numToPrint: break

        for i, ds in enumerate(result.dataset_weights):
            print(f'dset: {ds.name}, {ds.value}')
            if i > numToPrint: break
    else:
        print(f'query error: {result.statusMsg}')
    

In [None]:
# Run series of queries asynchronously
queries = [Query('human', ['SMO', 'PTCH1', 'PTCH2', 'BOC'], True),
           Query('fly', ['35234', '35232']),
           Query('mouse', ['GLI1', 'GLI2', 'PTCH1'], True),
           Query('worm', ['ptc-1', 'smo-1', 'tra-1'], True),
           Query('yeast', ['FIG1', 'FUS1', 'FUS2'], True),
           Query('zebrafish', ['PTCH2', 'PTCH1', 'BOC'], True)]

taskIds = [initQuery(q) for q in queries]
for taskId in taskIds:
    completeQuery(taskId)
    

In [None]:
species = 'fly'
genes = ['CG18094', 'CG10189']
# genes = [35234', '35232']
runQuery(species, genes, useSymbols=True)

In [None]:
species = 'mouse'
genes = ['GLI1', 'GLI2', 'PTCH1']
# genes = ['14632', '14633', '19206']
runQuery(species, genes, useSymbols=True)

In [None]:
species = 'human'
genes = ['SMO', 'PTCH1', 'PTCH2', 'BOC']
runQuery(species, genes, useSymbols=True)

In [None]:
species = 'mock'
genes = ['90634', '23659']
runQuery(species, genes, useSymbols=False)

In [None]:
# Run series of queries asynchronously
queries = [Query('mock', ['90634', '23659']),
           Query('mock', ['90634', '23659']),
           Query('mock', ['90634', '23659']),
           Query('mock', ['90634', '23659']),
           Query('mock', ['90634', '23659']),
           Query('mock', ['90634', '23659']),
           Query('mock', ['90634', '23659']),
          ]

taskIds = [initQuery(q) for q in queries]
for taskId in taskIds:
    completeQuery(taskId)

In [None]:
## close the server connection
transport.close()