In [1]:
import json

with open('../data/results2.json') as f: 
    data = json.load(f)
    
#The attributes for each contig in the results are:
#'name', 'length', 'cs', 'ucs', 'depth', 'confidence'

##name is a string containing the id of the contig
##length is an integer corresponding to the length of the contig
##cs is the core sequences (each sequence entry have the following attributes: 'name', 'position', 'length')
##ucs are the unique core sequences (each sequence entry have the following attributes: 'name', 'position', 'length')
##depth is an integer corresponding to the number of kmers covering a given position in the contig
##confidence is a computed float estimating a confidence probability for a given position in the contig

print data[0].keys()

[u'cs', u'length', u'depth', u'confidence', u'name']


## Create formatted data structure

In [2]:
import pprint

contigs = []
ref = {}
cs = {}
ucs = {}

for num_contig in range(len(data)):
    
    visit_flag = False
    
    contig = {}

    contig['trackName'] = 'Contig' #+ str(num_contig) 
    contig['trackType'] = 'stranded'
    contig['visible'] = True
    contig['inner_radius'] = 80 
    contig['outer_radius'] = 120
    contig['trackFeatures'] = 'complex'
    contig['featureThreshold'] = 7000000
    contig['mouseclick'] = 'islandPopup'
    contig['mouseover_callback'] = 'islandPopup'
    contig['mouseout_callback'] = 'islandPopupClear'
    contig['linear_mouseclick'] = 'linearPopup'
    contig['showLabels'] = True
    contig['showTooltip'] = True
    contig['linear_mouseclick'] = 'linearClick'
    contig['items'] = []

    if num_contig != 0:
        # add a gap of 1000 bases from one contig to the other
        cs['end'] += 30000 
        ref['end'] += 30000

    try:
        
        ref['id'] = num_contig
        ref['name'] = num_contig
        ref['strand'] = -1
        
        if num_contig == 0:
            ref['start'] = 0 
            ref['end'] = data[num_contig]['length']
        else:
            ref['start'] = ref['end'] + 1
            ref['end'] = ref['start'] + data[num_contig]['length']
            
        contig['items'].append(ref.copy())
        
        for num_cs in range(len(data[num_contig]['cs'])):
                
            cs['id'] = data[num_contig]['cs'][num_cs]['name'] 
            cs['name'] = data[num_contig]['cs'][num_cs]['name']
            cs['strand'] = -2

            if num_cs == 0 and num_contig == 0:
                cs['start'] = 0
                cs['end'] = data[num_contig]['cs'][num_cs]['length']
            else:
                cs['start'] = cs['end'] + 1
                cs['end'] = cs['start'] + data[num_contig]['cs'][num_cs]['length']

            contig['items'].append(cs.copy())
            
            if 'ucs' in data[num_contig].keys() and visit_flag == False:
            
                for num_ucs in range(len(data[num_contig]['ucs'])):

                    ucs['id'] = data[num_contig]['ucs'][num_ucs]['name']
                    ucs['name'] = data[num_contig]['ucs'][num_ucs]['name']
                    ucs['strand'] = -3
                
                    if num_ucs == 0:
                        ucs['start'] = cs['start']
                        ucs['end'] = ucs['start'] + data[num_contig]['ucs'][num_ucs]['length']
                    else:
                        ucs['start'] = ucs['end'] + 1
                        ucs['end'] = ucs['start'] + data[num_contig]['ucs'][num_ucs]['length']
                
                    contig['items'].append(ucs.copy())
                    
                visit_flag = True
             

    except KeyError:

        pass

    contigs.append(contig)
    
gaptrack = {}

gaptrack['trackName'] = 'gapTrack'
gaptrack['trackType']= 'gap'
gaptrack['inner_radius'] = 25
gaptrack['outer_radius'] = 235
gaptrack['showTooltip'] = True
gaptrack['items'] = []

for i,contig in enumerate(contigs):
    
    if len(contig['items']):
    
        gap = {}
        gap['id'] = i + 1
        gap['start'] = contig['items'][0]['start']
        gap['end'] = contig['items'][-1]['end']
        gap['name'] = 'contig_gap_' + str(i+1)

        gaptrack['items'].append(gap)

#contigs.append(gaptrack)
    

## Write data structure to a file

In [3]:
# save to file:
with open('../data/contig.data.json', 'w') as f:
    json.dump(contigs, f, indent=3)
    