In [1]:
import json

with open('../data/results2.json') as f: 
    data = json.load(f)
    
#The attributes for each contig in the results are:
#'name', 'length', 'cs', 'ucs', 'depth', 'confidence'

##name is a string containing the id of the contig
##length is an integer corresponding to the length of the contig
##cs is the core sequences (each sequence entry have the following attributes: 'name', 'position', 'length')
##ucs are the unique core sequences (each sequence entry have the following attributes: 'name', 'position', 'length')
##depth is an integer corresponding to the number of kmers covering a given position in the contig
##confidence is a computed float estimating a confidence probability for a given position in the contig

print data[0].keys()

[u'cs', u'length', u'depth', u'confidence', u'name']


## Create formatted data structure

In [2]:
genomesize = 6264404

tot_len = 0
for i in range(len(data)):
    
    try:
        tot_len += data[i]['length']

    except KeyError:
        pass
    
gap_bet_contigs = (genomesize - tot_len) / len(data)
print gap_bet_contigs

3009


In [3]:
ref = {}
cs = {}
ucs = {}

reference = []
core_seq = []
unique_core_seq = []

for num_contig in range(len(data)):
        
    visit_flag = False

    try:

        ref['id'] = num_contig
        ref['name'] = num_contig
        ref['strand'] = -1

        if num_contig == 0:
            ref['start'] = 0 
            ref['end'] = data[num_contig]['length']
        else:
            ref['start'] = ref['end'] + 1
            ref['end'] = ref['start'] + data[num_contig]['length']

        cs_tot_len = 0

        for num_cs in range(len(data[num_contig]['cs'])):

            cs['contig'] = num_contig
            cs['id'] = data[num_contig]['cs'][num_cs]['name'] 
            cs['name'] = data[num_contig]['cs'][num_cs]['name']
            cs['strand'] = -2

            if num_cs == 0 and num_contig == 0:
                cs['start'] = 0
                cs['end'] = data[num_contig]['cs'][num_cs]['length']
            else:
                cs['start'] = cs['end'] + 1
                cs['end'] = cs['start'] + data[num_contig]['cs'][num_cs]['length']

            #accumulate total cs length
            cs_tot_len += data[num_contig]['cs'][num_cs]['length']

            core_seq.append(cs.copy())

            if 'ucs' in data[num_contig].keys() and visit_flag == False:

                ucs_tot_len = 0

                for num_ucs in range(len(data[num_contig]['ucs'])):

                    ucs['contig'] = num_contig
                    ucs['id'] = data[num_contig]['ucs'][num_ucs]['name']
                    ucs['name'] = data[num_contig]['ucs'][num_ucs]['name']
                    ucs['strand'] = -3

                    if num_ucs == 0:
                        ucs['start'] = cs['start']
                        ucs['end'] = ucs['start'] + data[num_contig]['ucs'][num_ucs]['length']
                    else:
                        ucs['start'] = ucs['end'] + 1
                        ucs['end'] = ucs['start'] + data[num_contig]['ucs'][num_ucs]['length']

                    #accumulate total ucs length
                    ucs_tot_len += data[num_contig]['cs'][num_cs]['length']

                    unique_core_seq.append(ucs.copy())
                ucs['end'] += (data[num_contig]['length'] - ucs_tot_len) + gap_bet_contigs  

                visit_flag = True

        core_seq.append(cs.copy())
        cs['end'] += (data[num_contig]['length'] - cs_tot_len) + gap_bet_contigs

        reference.append(ref.copy())
        ref['end'] += gap_bet_contigs

    except KeyError:

        pass


In [4]:
track_type = ['reference', 'core_sequence', 'unique_core_sequence']
total_tracks = [reference, core_seq, unique_core_seq]

contigs = []

for i in range(len(track_type)):
    
    contig = {}
    
    contig['trackName'] = track_type[i] 
    contig['trackType'] = 'stranded'
    contig['visible'] = True
    contig['inner_radius'] = 120
    contig['outer_radius'] = 160
    contig['trackFeatures'] = 'complex'
    contig['featureThreshold'] = 7000000
    contig['mouseclick'] = 'islandPopup'
    contig['mouseover_callback'] = 'islandPopup'
    contig['mouseout_callback'] = 'islandPopupClear'
    contig['linear_mouseclick'] = 'linearPopup'
    contig['showLabels'] = True
    contig['showTooltip'] = True
    contig['linear_mouseclick'] = 'linearClick'
    contig['items'] = []
    
    contig['items'] = total_tracks[i]
    
    contigs.append(contig)
    

In [5]:
# save to file:
with open('../data/contig.data.json', 'w') as f:
    json.dump(contigs, f, indent=3)