In [None]:
import pandas as pd

# Turn CSV into a pandas DataFrame

connectivity_tab = pd.read_csv('./T_1-s2.0-S0092867416312429-mmc5.csv', sep=',')
#T_ is the transposed version

# Show first 10 rows of table

connectivity_tab[0:10]


In [None]:
# Make a list of column names to use as keys in lookup
columns = list(connectivity_tab.columns)
columns[0:5] # Return first five columns

In [None]:
fbrf_table = pd.read_csv('./fbrf-table.csv', sep=',')
fbrf_table[0:5]

In [None]:
# dictionary for neurons and IDs:

lookup = dict(zip(fbrf_table.Keys,fbrf_table.Values))

lookup

In [None]:
# Make a set (uniqued list) of all values in lookup

neurons = set(lookup.values())
neurons

In [None]:
# Make a dictionary with key - column header & value = template specification (first row of table).
# Meks first two columns

template_seed = { 'ID': 'ID', 'CLASS_TYPE': 'CLASS_TYPE',  'RDF_Type' : 
'TYPE' }

# The rest of the columns have the neuron id as the column name + template spec in row 1:
data_columns = { n : "C 'synapsed to' some %" for n in neurons }

# Add data columns to template seed

template_seed.update(data_columns)

# Create dataFrame for template
# from_records takes a list of dicts - one for each row.  We only have one row.

template = pd.DataFrame.from_records([template_seed])  



In [None]:
# Iterate over input table rows
## Iterate over rows.

# Using a dict of dicts as an intermediate data structure.  Surely better to assign directly to DataFrame?
rows = {}
for i, r in connectivity_tab.iterrows():
    input_row_key = r[0].lstrip() # use the first column as key for row.  Make sure no trailing/leading whitespace
    # Skip to next iteration if row key not in lookup:
    if not input_row_key in lookup.keys(): continue
    output_row_key = lookup[input_row_key] #input_row_key is name, output_row_key is FBbt
    if not output_row_key in rows: #don't replace an existing entry with new entry
        row_dict = { 'ID' : output_row_key, 'CLASS_TYPE' : 'subclass', 'RDF_Type' : 'owl:class'}
    counter = 0
    # iterate over row (k = key/column header, v = value of cell)
    for k,v in r.items():
        key = k.lstrip()  # Turns out that headers have a leading space. This strips it.
        counter += 1
        # Skip the first column
        if counter == 1: continue
        # check we can lookup key
        if key in lookup.keys():
            # lookup fbbt ID
            fbbt = lookup[key]
            if fbbt in row_dict and row_dict[fbbt] == fbbt: continue #do not overwrite values with blanks
            else:
                # default assumption = 0 synapses
                row_dict[fbbt] = ''
                # over-ride default if v > 0 for any key mapping to fbbt
                # v is a string, we need to turn it into an integer before checking it's > 0
                if int(v) > 0:
                    row_dict[fbbt] = fbbt
    rows[output_row_key] = row_dict
    
out = template.append(list(rows.values()))
out

In [None]:
out.to_csv("Jovanovic_template.csv", sep=",", index=False)