In [1]:
import json

with open('../data/results2.json') as f: 
    data = json.load(f)
    
#The attributes for each contig in the results are:
#'name', 'length', 'cs', 'ucs', 'depth', 'confidence'

##name is a string containing the id of the contig
##length is an integer corresponding to the length of the contig
##cs is the core sequences (each sequence entry have the following attributes: 'name', 'position', 'length')
##ucs are the unique core sequences (each sequence entry have the following attributes: 'name', 'position', 'length')
##depth is an integer corresponding to the number of kmers covering a given position in the contig
##confidence is a computed float estimating a confidence probability for a given position in the contig

print data[0].keys()

[u'cs', u'length', u'depth', u'confidence', u'name']


## Create formatted data structure

In [3]:
import pprint

contigs = []
cs = {}

for num_contig in range(len(data)):

    contig = {}

    contig['trackName'] = 'Contig ' + str(num_contig) 
    contig['trackType'] = 'stranded'
    contig['visible'] = True
    contig['inner_radius'] = 80 
    contig['outer_radius'] = 120
    contig['trackFeatures'] = 'complex'
    contig['featureThreshold'] = 7000000
    contig['mouseclick'] = 'islandPopup'
    contig['mouseover_callback'] = 'islandPopup'
    contig['mouseout_callback'] = 'islandPopupClear'
    contig['linear_mouseclick'] = 'linearPopup'
    contig['showLabels'] = True
    contig['showTooltip'] = True
    contig['linear_mouseclick'] = 'linearClick'
    contig['items'] = []
    
    if num_contig != 0:
        # add a gap of 100 bases from one contig to the other
        cs['end'] += 100 
    
    try:
        
        for num_cs in range(len(data[num_contig]['cs'])):

            cs['id'] = data[num_contig]['cs'][num_cs]['name'] 
            cs['strand'] = -1

            if num_cs == 0 and num_contig == 0:
                cs['start'] = 0
                cs['end'] = data[num_contig]['cs'][num_cs]['length']
            else:
                cs['start'] = cs['end'] + 1
                cs['end'] = cs['start'] + data[num_contig]['cs'][num_cs]['length']

            contig['items'].append(cs.copy())
            
    except KeyError:
        
        pass
    
    contigs.append(contig)
    
gaptrack = {}

gaptrack['trackName'] = 'gapTrack'
gaptrack['trackType']= 'gap'
gaptrack['inner radius'] = 25
gaptrack['outer radius'] = 235
gaptrack['showTooltip'] = True
gaptrack['items'] = []

for i,contig in enumerate(contigs):
    
    if len(contig['items']):
    
        gap = {}
        gap['id'] = i + 1
        gap['start'] = contig['items'][0]['start']
        gap['end'] = contig['items'][-1]['end']
        gap['name'] = 'contig_gap_' + str(i+1)

        gaptrack['items'].append(gap)

contigs.append(gaptrack)
    
pprint.pprint(contigs[0]['items'][-10:])
print "@@@@@@"
pprint.pprint(contigs[1]['items'][0:10])

11


IndexError: list index out of range

## Write data structure to a file

In [None]:
# save to file:
with open('../data/Contigs.json', 'w') as f:
    json.dump(contigs, f)

# load from file:
with open('../data/Contigs.json', 'r') as f:
    try:
        contigs = json.load(f)
    # if the file is empty the ValueError will be thrown
    except ValueError:
        contigs = {}