In [109]:
from interop import py_interop_run_metrics, py_interop_run, py_interop_summary


In [110]:
def parse_interop_data(run_folder, num_reads, num_lanes):
    """
    Parses summary statistics out of interops data using the Illumina interops package
    """

    # make empty dict to store output
    interop_dict = {'read_summaries': {}}

    
    # taken from illumina interops package documentation, all of this is required, 
    # even though only the summary variable is used further on
    run_metrics = py_interop_run_metrics.run_metrics()
    valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
    py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)
    run_folder = run_metrics.read(run_folder, valid_to_load)
    summary = py_interop_summary.run_summary()
    py_interop_summary.summarize_run_metrics(run_metrics, summary)
    
    
    for read in range(num_reads):
        
        new_read = read + 1
        
        if new_read not in interop_dict['read_summaries']:
            
            interop_dict['read_summaries'][new_read] = {}
        
            
        for lane in range(num_lanes):
            
            new_lane = lane + 1
                
            if new_lane not in interop_dict['read_summaries'][new_read]:
                    
                interop_dict['read_summaries'][new_read][new_lane] = {}
                    
            interop_dict['read_summaries'][read+1][lane+1]['percent_q30'] = summary.at(read).at(lane).percent_gt_q30()
            interop_dict['read_summaries'][read+1][lane+1]['density'] = summary.at(read).at(lane).density().mean()
            interop_dict['read_summaries'][read+1][lane+1]['density_pf'] = summary.at(read).at(lane).density_pf().mean()
            interop_dict['read_summaries'][read+1][lane+1]['cluster_count'] = summary.at(read).at(lane).density_pf().mean()
            interop_dict['read_summaries'][read+1][lane+1]['cluster_count_pf'] = summary.at(read).at(lane).cluster_count_pf().mean()
            interop_dict['read_summaries'][read+1][lane+1]['error_rate'] = summary.at(read).at(lane).error_rate().mean()
            interop_dict['read_summaries'][read+1][lane+1]['percent_aligned'] = summary.at(read).at(lane).percent_aligned().mean()
            interop_dict['read_summaries'][read+1][lane+1]['percent_pf'] = summary.at(read).at(lane).percent_pf().mean()
            interop_dict['read_summaries'][read+1][lane+1]['phasing'] = summary.at(read).at(lane).phasing().mean()
            interop_dict['read_summaries'][read+1][lane+1]['prephasing'] = summary.at(read).at(lane).prephasing().mean()
            interop_dict['read_summaries'][read+1][lane+1]['reads'] = summary.at(read).at(lane).reads()
            interop_dict['read_summaries'][read+1][lane+1]['reads_pf'] = summary.at(read).at(lane).reads_pf()
            interop_dict['read_summaries'][read+1][lane+1]['yield'] = summary.at(read).at(lane).yield_g()

    return interop_dict

In [111]:
parse_interop_data('/media/joseph/Storage/data/archive/nextseq/190913_NB551319_0026_AHT5G5AFXY/', 4, 4)

{'read_summaries': {1: {1: {'percent_q30': 92.2387924194336,
    'density': 244107.328125,
    'density_pf': 214203.265625,
    'cluster_count': 214203.265625,
    'cluster_count_pf': 643285.3125,
    'error_rate': 0.3278927803039551,
    'percent_aligned': 0.9878919124603271,
    'percent_pf': 87.7578353881836,
    'phasing': 0.21830081939697266,
    'prephasing': 0.11702568084001541,
    'reads': 52782588.0,
    'reads_pf': 46316548.0,
    'yield': 3.473161220550537},
   2: {'percent_q30': 92.34395599365234,
    'density': 234868.71875,
    'density_pf': 207354.796875,
    'cluster_count': 207354.796875,
    'cluster_count_pf': 622718.25,
    'error_rate': 0.4515427052974701,
    'percent_aligned': 1.002793312072754,
    'percent_pf': 88.27922058105469,
    'phasing': 0.2153674066066742,
    'prephasing': 0.1224762499332428,
    'reads': 50784988.0,
    'reads_pf': 44835700.0,
    'yield': 3.3620235919952393},
   3: {'percent_q30': 92.49681091308594,
    'density': 247218.6875,
    '