In [None]:
import numpy as np
from itertools import product


param_grid = {}
param_grid['batch_size'] = [64, 128]
param_grid['hidden_dim'] = [50, 100, 150]
param_grid['dropout'] = [0, 0.25, 0.5]

param_names = []
param_names.extend(param_grid.keys())


for pn, pv in param_grid.items():
    print('ParamGrid {} (type = {}) = {}'.format(pn, type(pv[0]), pv))
    
def search_space():
    return product(*[param_grid[pn] for pn in param_names])

rand_state = np.random.RandomState()
n = 1
def search_space_2():
    return list(zip(*[rand_state.choice(param_grid[pn], n)
            for pn in param_names]))


        
for k, param_vals in enumerate(search_space_2()):
        print('Search Space with Numpy Random State')
        for v in param_vals:
            print('{} (type = {})'.format(v, type(v)))
        print('\n')
        
param_names = param_names[0:1]
for k, param_vals in enumerate(search_space()):
        print('Search Space')
        for v in param_vals:
            print('{} (type = {})'.format(v, type(v)))
        print('\n')
        
        
print(type(param_grid[param_names[0]][0]))
print(type(rand_state.choice(param_grid[param_names[0]])))

In [None]:
import random
from snorkel.learning import GridSearch

class RandomSearchGPU(GridSearch):
    """
    A GridSearch over a random subsample of the hyperparameter search space.

    :param seed: A seed for the GridSearch instance
    """
    def __init__(self, model_class, parameter_dict, X_train, Y_train=None, n=10,
        model_class_params={}, model_hyperparams={}, seed=123, 
        save_dir='checkpoints'):
        """Search a random sample of size n from a parameter grid"""
        self.rand_state = np.random.RandomState()
        self.rand_state.seed(seed)
        self.n = n
        random.seed(seed)
        super(RandomSearchGPU, self).__init__(model_class, parameter_dict, X_train,
            Y_train=Y_train, model_class_params=model_class_params,
            model_hyperparams=model_hyperparams, save_dir=save_dir)

#    def search_space(self):
#        return list(zip(*[self.rand_state.choice(self.parameter_dict[pn], self.n)
#            for pn in self.param_names]))
    
    def search_space(self):
        return list(zip(*[random.choices(self.parameter_dict[pn], k=self.n)
            for pn in self.param_names]))




In [None]:

def pretty_size(size):
	"""Pretty prints a torch.Size object"""
	assert(isinstance(size, torch.Size))
	return " × ".join(map(str, size))


def dump_tensors(gpu_only=True):
	"""Prints a list of the Tensors being tracked by the garbage collector."""
	import gc
	total_size = 0
	for obj in gc.get_objects():
		try:
			if torch.is_tensor(obj):
				if not gpu_only or obj.is_cuda:
					print("%s:%s%s %s" % (type(obj).__name__, 
										  " GPU" if obj.is_cuda else "",
										  " pinned" if obj.is_pinned else "",
										  pretty_size(obj.size())))
					total_size += obj.numel()
			elif hasattr(obj, "data") and torch.is_tensor(obj.data):
				if not gpu_only or obj.is_cuda:
					print("%s → %s:%s%s%s%s %s" % (type(obj).__name__, 
												   type(obj.data).__name__, 
												   " GPU" if obj.is_cuda else "",
												   " pinned" if obj.data.is_pinned else "",
												   " grad" if obj.requires_grad else "", 
												   " volatile" if obj.volatile else "",
												   pretty_size(obj.data.size())))
					total_size += obj.data.numel()
		except Exception as e:
			pass        
	print("Total size:", total_size)
    
dump_tensors()

In [None]:

with open('results/gene_chemical_metabolism.tsv', 'r') as f:
    first = True
    for line in f:
        if first: # skip header
            first = False
            continue
        
        spl = line.replace('\n', '').split('\t')
        doc_id = spl[0]
        sen_id = spl[1]
        gen_id = spl[3]
        gen_span = spl[4]      
        chem_id = spl[5]
        chem_span = spl[6]
   
        fact_store.gen_id_to_span[gen_id] = gen_span
        fact_store.chem_id_to_span[chem_id] = chem_span
        #fact = ('meta', gen_span)       
        fact = (gen_id,'meta',chem_id)
        fact_store.add_fact(doc_id, fact)
fact_store.print_info()

with open('results/chemical_gene_inhibition.tsv', 'r') as f:
    first = True
    for line in f:
        if first: # skip header
            first = False
            continue
        
        spl = line.replace('\n', '').split('\t')
        doc_id = spl[0]
        sen_id = spl[1]
        chem_id = spl[3]
        chem_span = spl[4]
        gen_id = spl[5]
        gen_span = spl[6]
        
        fact_store.chem_id_to_span[chem_id] = chem_span
        fact_store.gen_id_to_span[gen_id] = gen_span
                
        #fact = (chem_span, 'inh', gen_span)
        fact = (chem_id, 'inh', gen_id)
        fact_store.add_fact(doc_id, fact)

        
fact_store.print_info()

In [None]:
# sim id
sim_id = 'MESH:D019821'
cyp3a4_id = '1576'
key = frozenset((sim_id, 'cd', rhabdo_id))
print(key)

f_sr_id = fact_store.fact_to_id[key]

print(f_sr_id)

key = frozenset((cyp3a4_id, 'meta', sim_id))
f_gs_id = fact_store.fact_to_id[key]

print(f_gs_id)

check_set = set()
check_set.add(f_sr_id)
check_set.add(f_gs_id)

to_check = []
to_check.append(check_set)
print("To_check: {}".format(to_check))


In [None]:
import re

#str_text = 'telaprevir inhibits the reaction [CYP3A4 protein results in increased metabolism of Simvastatin]'.lower()
#str_text = 'simvastatin inhibits the reaction [CYP3A4 protein results in inhibit metabolism of Simvastatin]'.lower()
str_text = 'tamoxifen inhibits the reaction [amitraz results in increased expression of th protein]' 

if 'inhibit' in str_text:
    print("yes contained")

che_name = 'tamoxifen'
gen_name = 'amitraz'
regex1 = '{}[^\]\[]+inhibit[^\]\[]+{}'.format(che_name, gen_name)
regex2 = '{}[^\]\[]+inhibit[^\]\[]+{}'.format(gen_name, che_name)

res1 = re.search(regex1, str_text)   
res2 = re.search(regex2, str_text)  

print(res1)
print(res2)