In [1]:
import requests

cortex_request = requests.get("http://loom.linnarssonlab.org/clone/Previously%20Published/Cortex.loom", stream=True)
cortex_file = open("cortex.loom", "wb")
cortex_file.write(cortex_request.raw.read())
cortex_file.close()

In [None]:
import loompy
cortex = loompy.connect("cortex.loom")

In [5]:
list(cortex.attrs.items())

[('title', 'Cortex and hippocampus'),
 ('description', 'Cortex and hippocampus by Zeisel et al. 2015'),
 ('doi', 'doi:10.1126/science.aaa1934'),
 ('url',
  'http://science.sciencemag.org/content/early/2015/02/18/science.aaa1934.full'),
 ('CreationDate', '2017/12/25 22:11:15'),
 ('last_modified', '20180405T232022.549660Z')]

In [53]:
cortex.shape

(21135, 3005)

In [50]:
def list_attributes(attrs):
    for k,v in attrs:
        uval = list(set(v))
        if len(uval) < len(v):
            print(k + ' => ' + str(uval[0:50]))

In [52]:
list_attributes(cortex.ra.items())

Gene => ['Mbl2', 'Cd163', 'Mir1898', 'Baz2b', 'P2ry13', 'Spag8', 'Rnase4', 'Mir654', 'Ip6k3', 'Gm16063', 'Fancf', 'Nfix', 'Cpb2', 'H2-Ab1', 'Ptpdc1', 'Hao2', 'Ammecr1', 'r_RLTR13G', 'Vmn1r31', 'Kcnh5', 'Gria1', 'Lims1', 'Rusc1', 'Chst14', 'Syf2', 'Glis3', '4833411C07Rik', 'r_MMERGLN-int', '8030462N17Rik', '1810007D17Rik', 'Gm8773', 'Map1lc3a', 'Speer6-ps1', 'Alpk3', 'Suclg2', 'Lmbrd2', 'Krt20', 'C430002E04Rik', 'Cilp', 'Slc44a4', 'Havcr2', 'A330040F15Rik', 'Ncoa3', 'Phka2', 'Cdca2', 'Zfp521', 'Kctd19', 'Ccer1', 'Chrm1', 'Acadl']
GeneGroup => ['2', '9', '5', '1', '6', '-1', '0', '7', '8', '3', '4']
GeneType => ['mRNA', 'Mitochondrial', 'Repeat', 'Spikein']


In [51]:
list_attributes(cortex.ca.items())

Age => ['23', '25', '27', '21', '28', '20', '31', '26', '22', '24']
Class => ['oligodendrocytes', 'astrocytes_ependymal', 'pyramidal SS', 'endothelial-mural', 'pyramidal CA1', 'interneurons', 'microglia']
Diameter => ['8.49', '9.72', '8.84', '9.88', '9.13', '9.82', '7.96', '19.1', '8.86', '20.3', '7.48', '8.19', '9.84', '10.2', '7.5', '9.62', '7.09', '8.36', '10.3', '21.4', '15.6', '17.4', '9.05', '6.75', '8.2', '7.82', '9.86', '13.7', '7.32', '25.6', '6.76', '18.2', '6.06', '6.59', '8.15', '19.2', '8.68', '9.91', '8.64', '8.34', '15', '9.81', '8.94', '6.45', '8.47', '7.54', '6.91', '18.8', '7.11', '9.92']
Group => ['2', '9', '5', '1', '6', '7', '8', '3', '4']
Sex => ['0', '-1', '1']
Subclass => ['Int15', 'Mgl2', 'Int4', 'Int10', 'Peric', 'Pvm1', 'Oligo1', '(none)', 'S1PyrL5', 'Int8', 'CA1Pyr1', 'Pvm2', 'Vend1', 'Oligo5', 'Vend2', 'Int7', 'Int12', 'Oligo6', 'Oligo4', 'Vsmc', 'S1PyrL4', 'Astro2', 'Int2', 'Int11', 'Int13', 'CA2Pyr2', 'SubPyr', 'Int14', 'Int5', 'S1PyrL6b', 'Epend', 'Int1'

#### Fields we might provide semantic maps for:

* Gene type
* Class (= Cell type)
* subClass ( = more specific Cell type)
* Tissue
* Sex (but what do the entries mean?)

With the help of the paper and some searching on the Ontology Lookup Service, these can easily be mapped to cell ontology terms:  'interneuron': "CL:0000099", 'oligodendrocyte': 'CL:0000128', 'microglial cell' : 'CL:0000129', 'pyramidal neuron' : 'CL:0000598', 'ependymal cell': 'CL:0000065', 'astrocyte' : 'CL:0000127', 'endothelial cell': 'CL:0000115'

Note that some of the annotation strings map to multiple cell types, for example, astrocytes_ependymal corresponds to  'ependymal cell': 'CL:0000065' OR 'astrocyte' : 'CL:0000127'

Similarly, the two values in the tissue field, can be mapped as follows:

'CA1 field of hippocampus': 'UBERON:0003881',  'somatosensenory cortex': 'UBERON:0008930'


In [None]:
# (Crude) Function to roll map element

def roll_map(name, applicable_to, maps_to, relation = '', obj = ''):
    out = { "name": name, "applicable_to": applicable_to,
              "maps_to": maps_to }
    if relation:
        out['subject_of'] = { "relation": relation, "object": obj }
        

mappings = []

mappings.append(roll_map("interneurons", ["ca.Class"], { "name": "interneuron",  "id": "CL:0000099" },
               relation={ "name": "has_soma_location", "id": "RO:0002100" }, obj="ca.Tissue" ))


mappings.append(roll_map())

In [None]:
# Validating JSON

import 

# Inserting JSON into header

In [60]:
# Function to translate dot paths to Loom attributes.

import warnings
import json

def check_string(x):
    if type(x)== 'str':
        return true
    else:
        warnings.warn
        
def is_json(myjson):
  try:
    json_object = json.loads(myjson)
  except:
    return False
  return True

def dot_path2jpath(dot.path, json_string):
    json.loads(json_string)
    # Something with JPATH
    return 

def resolve_dot_path(loom, path):
    elements = path.split('.')
    if elements[0] == ca:
        column = loom.ca[elements[1]]
        if check_string(column[0]):
            return list(set(column))
    if elements[0] == ra:
        row = loom.ra[elements[1]]        
        if check_string(row[0]):
            return list(set(row))        
    if elements[0] == attrs:
        attr = loom.attr[element[1]
        if is_json(attr):
            return dot_path2jpath(attr)
        else:
            return [attr]
        
# Validation - do all maps resolve to terms used.

# Validation - ontology term IRI resolution

# Function to query by ontology term

def query_by_ontology_name:
    return
    
def query_by_ontology_id:
    return
    

# Function for Ontology term query with grouping (via OLS)