# Overlap GMT
The purpose of this Jupyter Notebook is to rank the kinase-target gene sets by their amount of co-occurence and to store this information in a separate GMT file. This code will be heavily borrowed from the work of Damon Pham, who has written code to perform this task with other libraries. The GMT file containing the kinases and gene sets will be the Combined GMT file created as a culmination of seven databases containing information regarding kinase-target interactions. 

Date created: 6/27/17

## Import Packages Needed to Run Code

In [21]:
import csv
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
from joblib import Parallel, delayed

In [22]:
#Input: library file from Enrichr. Output: transformed library file with genes as indices and tfs as columns; membership denoted by True/False.
#Checks beforehand if the transformed file has already been created.
#Convert each GMT file to a matrix where the indices are genes and columns are genesets. 
#Cell values are Boolean T/F, showing whether the gene is in
#the geneset or not. 
def library_csv_to_df(library_file):
    tformed_lib_fname = library_file.replace('.txt', '_transformed.csv')
    if os.path.isfile(tformed_lib_fname):
        df = pd.read_csv(tformed_lib_fname, index_col=0, sep='\t')
    else:
        print('transforming', library_file)
    with open(library_file, 'r') as f:
        df = pd.DataFrame(False, index = [''], columns = [''], dtype=bool)
        reader = csv.reader(f, delimiter='\t')
        for row in reader:
            row = [x.replace(',1.0', '') for x in row]
            #print('transforming', row[0])
            s = pd.DataFrame(True, index = row[2:], columns = [row[0]], dtype=bool)
            df = pd.concat([df,s], axis=1)
    df = df[pd.notnull(df.index)].fillna(False)
    df = df.loc[pd.notnull(df.index)]
    df.drop('', inplace=True)
    df.drop('', axis=1, inplace=True)
    df.to_csv(tformed_lib_fname, sep='\t')
    df = df.to_sparse()
    return df

#Test if creation of geneset matrix works
overlap = library_csv_to_df('Combined.gmt')
overlap

In [23]:
def get_coo_matrix(lib2_df, lib1_tf):
	#input: dataframe of lib2 tf vectors, and the column of lib1 corresponding to a tf
	print(lib1_tf)
	p_matrix = {}
	for itf2 in range(0,lib2_df.shape[1]): #if lib1 is the same as lib2, then use 'range(lib1_tf,lib2_df.shape[1])' instead to avoid repeats
		intersection = pd.concat([lib2_df.iloc[:,itf2],[lib2_df.iloc[:,itf2]], lib2_df[lib1_tf]], axis=1, join='inner')
		a = intersection.shape[0]
		b = lib2_df.iloc[:,itf2].sum() - a
		c = lib1_tf.sum() - a
		d = 20000 - a - b - c
		o, p = stats.fisher_exact([[a,b],[c,d]], alternative='greater')
		p_matrix[lib1_tf.name + ',' + lib2_df.columns.values[itf2]] = p
	return p_matrix

if __name__ == '__main__': #this mysterious statement here is necessary for parallel processing
	lib1 = 'NetworKIN.gmt' 
	lib2 = 'MINT.gmt' 

	transformed = {}
	for x in [lib1, lib2]:
		transformed[x] = library_csv_to_df(x)

In [24]:
#n_jobs is number of simultaneous processes: seven is about the highest it can go without slowing computer too much or crashing
#function iterates over lib2, parallel processing iterates over lib1
#p_dicts becomes a list of dicts returned by get_coo_matrix for each Parallel iteration
p_dicts = Parallel(n_jobs=7)(delayed(get_coo_matrix)(transformed[lib2], column) for column in transformed[lib1])
p_combined_dict = { k: v for d in p_dicts for k, v in d.items() }
p_df = pd.DataFrame(index=tformed_libs[feature_lib].columns, columns = tformed_libs[feature_lib].columns)
for x in p_combined_dict:
	tf1 = x.partition(',')[0]
	tf2 = x.partition(',')[2]
	p_df.at[tf1,tf2] = p_combined_dict[x]
p_df.to_csv(feature_lib.replace('.txt', '') + '_coo_p_vals.csv', sep='\t')

FRK_Homo sapiens
PKCzeta_Homo sapiens
MAPK1_Homo sapiens
TTK_Homo sapiens
CDK2_Homo sapiens
CDK1_Homo sapiens
PKAalpha_Homo sapiens
CK2alpha_Homo sapiens
PKBalpha_Homo sapiens
PKCdelta_Homo sapiens
HIPK2_Homo sapiens


JoblibTypeError: JoblibTypeError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/runpy.py in _run_code(code=<code object <module> at 0x7febe7ee3270, file "/...3.6/site-packages/ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': '/home/maayanlab/anaconda3/lib/python3.6/site-packages/__pycache__/ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/home/maayanlab/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/home/maayan.../python3.6/site-packages/ipykernel/kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x7febe7ee3270, file "/...3.6/site-packages/ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': '/home/maayanlab/anaconda3/lib/python3.6/site-packages/__pycache__/ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/home/maayanlab/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/home/maayan.../python3.6/site-packages/ipykernel/kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    472             return self.subapp.start()
    473         if self.poller is not None:
    474             self.poller.start()
    475         self.kernel.start()
    476         try:
--> 477             ioloop.IOLoop.instance().start()
    478         except KeyboardInterrupt:
    479             pass
    480 
    481 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/zmq/eventloop/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    172             )
    173         return loop
    174     
    175     def start(self):
    176         try:
--> 177             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    178         except ZMQError as e:
    179             if e.errno == ETERM:
    180                 # quietly return on ETERM
    181                 pass

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    883                 self._events.update(event_pairs)
    884                 while self._events:
    885                     fd, events = self._events.popitem()
    886                     try:
    887                         fd_obj, handler_func = self._handlers[fd]
--> 888                         handler_func(fd_obj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 5
    889                     except (OSError, IOError) as e:
    890                         if errno_from_exception(e) == errno.EPIPE:
    891                             # Happens when the client closes the connection
    892                             pass

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 5), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 5)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=5)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': r"#n_jobs is number of simultaneous processes: sev...eplace('.txt', '') + '_coo_p_vals.csv', sep='\t')", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2017, 6, 27, 16, 35, 39, 1772, tzinfo=datetime.timezone.utc), 'msg_id': 'F0D4177C2E324A0AB3228A314863724B', 'msg_type': 'execute_request', 'session': '3034417890BD48F6914CD0EFEA3AB055', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': 'F0D4177C2E324A0AB3228A314863724B', 'msg_type': 'execute_request', 'parent_header': {}})
    230             self.log.warn("Unknown message type: %r", msg_type)
    231         else:
    232             self.log.debug("%s: %s", msg_type, msg)
    233             self.pre_handler_hook()
    234             try:
--> 235                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'3034417890BD48F6914CD0EFEA3AB055']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': r"#n_jobs is number of simultaneous processes: sev...eplace('.txt', '') + '_coo_p_vals.csv', sep='\t')", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2017, 6, 27, 16, 35, 39, 1772, tzinfo=datetime.timezone.utc), 'msg_id': 'F0D4177C2E324A0AB3228A314863724B', 'msg_type': 'execute_request', 'session': '3034417890BD48F6914CD0EFEA3AB055', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': 'F0D4177C2E324A0AB3228A314863724B', 'msg_type': 'execute_request', 'parent_header': {}}
    236             except Exception:
    237                 self.log.error("Exception in message handler:", exc_info=True)
    238             finally:
    239                 self.post_handler_hook()

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'3034417890BD48F6914CD0EFEA3AB055'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': r"#n_jobs is number of simultaneous processes: sev...eplace('.txt', '') + '_coo_p_vals.csv', sep='\t')", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2017, 6, 27, 16, 35, 39, 1772, tzinfo=datetime.timezone.utc), 'msg_id': 'F0D4177C2E324A0AB3228A314863724B', 'msg_type': 'execute_request', 'session': '3034417890BD48F6914CD0EFEA3AB055', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': 'F0D4177C2E324A0AB3228A314863724B', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code=r"#n_jobs is number of simultaneous processes: sev...eplace('.txt', '') + '_coo_p_vals.csv', sep='\t')", silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    191 
    192         self._forward_input(allow_stdin)
    193 
    194         reply_content = {}
    195         try:
--> 196             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = r"#n_jobs is number of simultaneous processes: sev...eplace('.txt', '') + '_coo_p_vals.csv', sep='\t')"
        store_history = True
        silent = False
    197         finally:
    198             self._restore_input()
    199 
    200         if res.error_before_exec is not None:

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=(r"#n_jobs is number of simultaneous processes: sev...eplace('.txt', '') + '_coo_p_vals.csv', sep='\t')",), **kwargs={'silent': False, 'store_history': True})
    528             )
    529         self.payload_manager.write_payload(payload)
    530 
    531     def run_cell(self, *args, **kwargs):
    532         self._last_traceback = None
--> 533         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = (r"#n_jobs is number of simultaneous processes: sev...eplace('.txt', '') + '_coo_p_vals.csv', sep='\t')",)
        kwargs = {'silent': False, 'store_history': True}
    534 
    535     def _showtraceback(self, etype, evalue, stb):
    536         # try to preserve ordering of tracebacks and print statements
    537         sys.stdout.flush()

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell=r"#n_jobs is number of simultaneous processes: sev...eplace('.txt', '') + '_coo_p_vals.csv', sep='\t')", store_history=True, silent=False, shell_futures=True)
   2712                 self.displayhook.exec_result = result
   2713 
   2714                 # Execute the user code
   2715                 interactivity = "none" if silent else self.ast_node_interactivity
   2716                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2717                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2718                 
   2719                 self.last_execution_succeeded = not has_raised
   2720 
   2721                 # Reset this so later displayed values do not modify the

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.For object>, <_ast.Expr object>], cell_name='<ipython-input-24-33e35d0e0562>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 7febc40247b8, executi..._before_exec=None error_in_exec=None result=None>)
   2816 
   2817         try:
   2818             for i, node in enumerate(to_run_exec):
   2819                 mod = ast.Module([node])
   2820                 code = compiler(mod, cell_name, "exec")
-> 2821                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x7febabd03a50, file "<ipython-input-24-33e35d0e0562>", line 4>
        result = <ExecutionResult object at 7febc40247b8, executi..._before_exec=None error_in_exec=None result=None>
   2822                     return True
   2823 
   2824             for i, node in enumerate(to_run_interactive):
   2825                 mod = ast.Interactive([node])

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x7febabd03a50, file "<ipython-input-24-33e35d0e0562>", line 4>, result=<ExecutionResult object at 7febc40247b8, executi..._before_exec=None error_in_exec=None result=None>)
   2876         outflag = 1  # happens in more places, so it's easier as default
   2877         try:
   2878             try:
   2879                 self.hooks.pre_run_code_hook()
   2880                 #rprint('Running code', repr(code_obj)) # dbg
-> 2881                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x7febabd03a50, file "<ipython-input-24-33e35d0e0562>", line 4>
        self.user_global_ns = {'In': ['', 'import csv\nimport os\nimport pickle\nimport numpy ...ats as stats\nfrom joblib import Parallel, delayed', "#Input: library file from Enrichr. Output: trans..., sep='\\t')\n    df = df.to_sparse()\n    return df", 'def get_coo_matrix(lib2_df, lib1_tf):\n\t#input: d...ib2_df.columns.values[itf2]] = p\n\treturn p_matrix', "if __name__ == '__main__': #this mysterious stat...sing\n\tlib1 = 'NetworKIN.gmt' \n\tlib2 = 'MINT.gmt' ", 'transformed = {}\nfor x in [lib1, lib2]:\n\ttransformed[x] = library_csv_to_df(x)', 'import csv\nimport os\nimport pickle\nimport numpy ...ats as stats\nfrom joblib import Parallel, delayed', "#Input: library file from Enrichr. Output: trans..., sep='\\t')\n    df = df.to_sparse()\n    return df", 'def get_coo_matrix(lib2_df, lib1_tf):\n\t#input: d...ib2_df.columns.values[itf2]] = p\n\treturn p_matrix', "if __name__ == '__main__': #this mysterious stat...sing\n\tlib1 = 'NetworKIN.gmt' \n\tlib2 = 'MINT.gmt' ", 'transformed = {}\nfor x in [lib1, lib2]:\n\ttransformed[x] = library_csv_to_df(x)', r"#n_jobs is number of simultaneous processes: sev...eplace('.txt', '') + '_coo_p_vals.csv', sep='\t')", 'import csv\nimport os\nimport pickle\nimport numpy ...ats as stats\nfrom joblib import Parallel, delayed', "#Input: library file from Enrichr. Output: trans..., sep='\\t')\n    df = df.to_sparse()\n    return df", 'def get_coo_matrix(lib2_df, lib1_tf):\n\t#input: d...ib2_df.columns.values[itf2]] = p\n\treturn p_matrix', r"#n_jobs is number of simultaneous processes: sev...eplace('.txt', '') + '_coo_p_vals.csv', sep='\t')", 'def get_coo_matrix(lib2_df, lib1_tf):\n\t#input: d...ib2_df.columns.values[itf2]] = p\n\treturn p_matrix', 'import csv\nimport os\nimport pickle\nimport numpy ...ats as stats\nfrom joblib import Parallel, delayed', "#Input: library file from Enrichr. Output: trans..., sep='\\t')\n    df = df.to_sparse()\n    return df", 'def get_coo_matrix(lib2_df, lib1_tf):\n\t#input: d...ib2_df.columns.values[itf2]] = p\n\treturn p_matrix', ...], 'Out': {}, 'Parallel': <class 'joblib.parallel.Parallel'>, '_': '', '__': '', '___': '', '__builtin__': <module 'builtins' (built-in)>, '__builtins__': <module 'builtins' (built-in)>, '__doc__': 'Automatically created module for IPython interactive environment', '__loader__': None, ...}
        self.user_ns = {'In': ['', 'import csv\nimport os\nimport pickle\nimport numpy ...ats as stats\nfrom joblib import Parallel, delayed', "#Input: library file from Enrichr. Output: trans..., sep='\\t')\n    df = df.to_sparse()\n    return df", 'def get_coo_matrix(lib2_df, lib1_tf):\n\t#input: d...ib2_df.columns.values[itf2]] = p\n\treturn p_matrix', "if __name__ == '__main__': #this mysterious stat...sing\n\tlib1 = 'NetworKIN.gmt' \n\tlib2 = 'MINT.gmt' ", 'transformed = {}\nfor x in [lib1, lib2]:\n\ttransformed[x] = library_csv_to_df(x)', 'import csv\nimport os\nimport pickle\nimport numpy ...ats as stats\nfrom joblib import Parallel, delayed', "#Input: library file from Enrichr. Output: trans..., sep='\\t')\n    df = df.to_sparse()\n    return df", 'def get_coo_matrix(lib2_df, lib1_tf):\n\t#input: d...ib2_df.columns.values[itf2]] = p\n\treturn p_matrix', "if __name__ == '__main__': #this mysterious stat...sing\n\tlib1 = 'NetworKIN.gmt' \n\tlib2 = 'MINT.gmt' ", 'transformed = {}\nfor x in [lib1, lib2]:\n\ttransformed[x] = library_csv_to_df(x)', r"#n_jobs is number of simultaneous processes: sev...eplace('.txt', '') + '_coo_p_vals.csv', sep='\t')", 'import csv\nimport os\nimport pickle\nimport numpy ...ats as stats\nfrom joblib import Parallel, delayed', "#Input: library file from Enrichr. Output: trans..., sep='\\t')\n    df = df.to_sparse()\n    return df", 'def get_coo_matrix(lib2_df, lib1_tf):\n\t#input: d...ib2_df.columns.values[itf2]] = p\n\treturn p_matrix', r"#n_jobs is number of simultaneous processes: sev...eplace('.txt', '') + '_coo_p_vals.csv', sep='\t')", 'def get_coo_matrix(lib2_df, lib1_tf):\n\t#input: d...ib2_df.columns.values[itf2]] = p\n\treturn p_matrix', 'import csv\nimport os\nimport pickle\nimport numpy ...ats as stats\nfrom joblib import Parallel, delayed', "#Input: library file from Enrichr. Output: trans..., sep='\\t')\n    df = df.to_sparse()\n    return df", 'def get_coo_matrix(lib2_df, lib1_tf):\n\t#input: d...ib2_df.columns.values[itf2]] = p\n\treturn p_matrix', ...], 'Out': {}, 'Parallel': <class 'joblib.parallel.Parallel'>, '_': '', '__': '', '___': '', '__builtin__': <module 'builtins' (built-in)>, '__builtins__': <module 'builtins' (built-in)>, '__doc__': 'Automatically created module for IPython interactive environment', '__loader__': None, ...}
   2882             finally:
   2883                 # Reset our crash handler in place
   2884                 sys.excepthook = old_excepthook
   2885         except SystemExit as e:

...........................................................................
/home/maayanlab/Desktop/Projects/KEA3/Combined Dataset/<ipython-input-24-33e35d0e0562> in <module>()
      1 #n_jobs is number of simultaneous processes: seven is about the highest it can go without slowing computer too much or crashing
      2 #function iterates over lib2, parallel processing iterates over lib1
      3 #p_dicts becomes a list of dicts returned by get_coo_matrix for each Parallel iteration
----> 4 p_dicts = Parallel(n_jobs=7)(delayed(get_coo_matrix)(transformed[lib2], column) for column in transformed[lib1])
      5 p_combined_dict = { k: v for d in p_dicts for k, v in d.items() }
      6 p_df = pd.DataFrame(index=tformed_libs[feature_lib].columns, columns = tformed_libs[feature_lib].columns)
      7 for x in p_combined_dict:
      8 	tf1 = x.partition(',')[0]
      9 	tf2 = x.partition(',')[2]
     10 	p_df.at[tf1,tf2] = p_combined_dict[x]

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/joblib/parallel.py in __call__(self=Parallel(n_jobs=7), iterable=<generator object <genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=7)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
TypeError                                          Tue Jun 27 12:35:39 2017
PID: 4442                Python 3.6.1: /home/maayanlab/anaconda3/bin/python
...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/joblib/parallel.py in __call__(self=<joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function get_coo_matrix>, (           PASK_Homo sapiens  CSNK2A1_Homo sapie...alse              False  

[76 rows x 11 columns], 'FRK_Homo sapiens'), {})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/joblib/parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function get_coo_matrix>
        args = (           PASK_Homo sapiens  CSNK2A1_Homo sapie...alse              False  

[76 rows x 11 columns], 'FRK_Homo sapiens')
        kwargs = {}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/home/maayanlab/Desktop/Projects/KEA3/Combined Dataset/<ipython-input-23-433d21291d6d> in get_coo_matrix(lib2_df=           PASK_Homo sapiens  CSNK2A1_Homo sapie...alse              False  

[76 rows x 11 columns], lib1_tf='FRK_Homo sapiens')
      1 def get_coo_matrix(lib2_df, lib1_tf):
      2 	#input: dataframe of lib2 tf vectors, and the column of lib1 corresponding to a tf
      3 	print(lib1_tf)
      4 	p_matrix = {}
      5 	for itf2 in range(0,lib2_df.shape[1]): #if lib1 is the same as lib2, then use 'range(lib1_tf,lib2_df.shape[1])' instead to avoid repeats
----> 6 		intersection = pd.concat([lib2_df.loc[:,itf2][lib2_df.loc[:,itf2]], lib1_tf[lib1_tf]], axis=1, join='inner')
      7 		a = intersection.shape[0]
      8 		b = lib2_df.loc[:,itf2].sum() - a
      9 		c = lib1_tf.sum() - a
     10 		d = 20000 - a - b - c

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/pandas/core/indexing.py in __getitem__(self=<pandas.core.indexing._LocIndexer object>, key=(slice(None, None, None), 0))
   1320             try:
   1321                 if self._is_scalar_access(key):
   1322                     return self._getitem_scalar(key)
   1323             except (KeyError, IndexError):
   1324                 pass
-> 1325             return self._getitem_tuple(key)
        self._getitem_tuple = <bound method _NDFrameIndexer._getitem_tuple of <pandas.core.indexing._LocIndexer object>>
        key = (slice(None, None, None), 0)
   1326         else:
   1327             key = com._apply_if_callable(key, self.obj)
   1328             return self._getitem_axis(key, axis=0)
   1329 

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_tuple(self=<pandas.core.indexing._LocIndexer object>, tup=(slice(None, None, None), 0))
    831         raise NotImplementedError("cannot set using an indexer with a Panel "
    832                                   "yet!")
    833 
    834     def _getitem_tuple(self, tup):
    835         try:
--> 836             return self._getitem_lowerdim(tup)
        self._getitem_lowerdim = <bound method _NDFrameIndexer._getitem_lowerdim of <pandas.core.indexing._LocIndexer object>>
        tup = (slice(None, None, None), 0)
    837         except IndexingError:
    838             pass
    839 
    840         # no multi-index, so validate all of the indexers

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_lowerdim(self=<pandas.core.indexing._LocIndexer object>, tup=(slice(None, None, None), 0))
    962         # to avoid wasted computation
    963         # df.ix[d1:d2, 0] -> columns first (True)
    964         # df.ix[0, ['C', 'B', A']] -> rows first (False)
    965         for i, key in enumerate(tup):
    966             if is_label_like(key) or isinstance(key, tuple):
--> 967                 section = self._getitem_axis(key, axis=i)
        section = undefined
        self._getitem_axis = <bound method _LocIndexer._getitem_axis of <pandas.core.indexing._LocIndexer object>>
        key = 0
        axis = undefined
        i = 1
    968 
    969                 # we have yielded a scalar ?
    970                 if not is_list_like_indexer(section):
    971                     return section

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_axis(self=<pandas.core.indexing._LocIndexer object>, key=0, axis=1)
   1546                 indexer = [slice(None)] * self.ndim
   1547                 indexer[axis] = locs
   1548                 return self.obj.iloc[tuple(indexer)]
   1549 
   1550         # fall thru to straight lookup
-> 1551         self._has_valid_type(key, axis)
        self._has_valid_type = <bound method _LocIndexer._has_valid_type of <pandas.core.indexing._LocIndexer object>>
        key = 0
        axis = 1
   1552         return self._get_label(key, axis=axis)
   1553 
   1554 
   1555 class _iLocIndexer(_LocationIndexer):

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/pandas/core/indexing.py in _has_valid_type(self=<pandas.core.indexing._LocIndexer object>, key=0, axis=1)
   1427                                     "key")
   1428                 raise KeyError("the label [%s] is not in the [%s]" %
   1429                                (key, self.obj._get_axis_name(axis)))
   1430 
   1431             try:
-> 1432                 key = self._convert_scalar_indexer(key, axis)
        key = 0
        self._convert_scalar_indexer = <bound method _NDFrameIndexer._convert_scalar_indexer of <pandas.core.indexing._LocIndexer object>>
        axis = 1
   1433                 if not ax.contains(key):
   1434                     error()
   1435             except TypeError as e:
   1436 

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/pandas/core/indexing.py in _convert_scalar_indexer(self=<pandas.core.indexing._LocIndexer object>, key=0, axis=1)
    231 
    232     def _convert_scalar_indexer(self, key, axis):
    233         # if we are accessing via lowered dim, use the last dim
    234         ax = self.obj._get_axis(min(axis, self.ndim - 1))
    235         # a scalar
--> 236         return ax._convert_scalar_indexer(key, kind=self.name)
        ax._convert_scalar_indexer = <bound method Index._convert_scalar_indexer of I...ns', 'AKT1_Homo sapiens'],
      dtype='object')>
        key = 0
        self.name = 'loc'
    237 
    238     def _convert_slice_indexer(self, key, axis):
    239         # if we are accessing via lowered dim, use the last dim
    240         ax = self.obj._get_axis(min(axis, self.ndim - 1))

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in _convert_scalar_indexer(self=Index(['PASK_Homo sapiens', 'CSNK2A1_Homo sapien...ens', 'AKT1_Homo sapiens'],
      dtype='object'), key=0, kind='loc')
   1280                                               'mixed']:
   1281                     return self._invalid_indexer('label', key)
   1282 
   1283             elif kind in ['loc'] and is_integer(key):
   1284                 if not self.holds_integer():
-> 1285                     return self._invalid_indexer('label', key)
        self._invalid_indexer = <bound method Index._invalid_indexer of Index(['...ns', 'AKT1_Homo sapiens'],
      dtype='object')>
        key = 0
   1286 
   1287         return key
   1288 
   1289     _index_shared_docs['_convert_slice_indexer'] = """

...........................................................................
/home/maayanlab/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in _invalid_indexer(self=Index(['PASK_Homo sapiens', 'CSNK2A1_Homo sapien...ens', 'AKT1_Homo sapiens'],
      dtype='object'), form='label', key=0)
   1464     def _invalid_indexer(self, form, key):
   1465         """ consistent invalid indexer message """
   1466         raise TypeError("cannot do {form} indexing on {klass} with these "
   1467                         "indexers [{key}] of {kind}".format(
   1468                             form=form, klass=type(self), key=key,
-> 1469                             kind=type(key)))
        key = 0
   1470 
   1471     def get_duplicates(self):
   1472         from collections import defaultdict
   1473         counter = defaultdict(lambda: 0)

TypeError: cannot do label indexing on <class 'pandas.core.indexes.base.Index'> with these indexers [0] of <class 'int'>
___________________________________________________________________________

In [None]:
for column in transformed[lib1]:
    p_dicts = get_coo_matrix(transformed[lib2], column)
    
p_dicts