AGP -> TPF conversion script

Takes pre-curation TPF and post-curation AGP to create post-curation TPF

Coords are (best guess) converted from AGP to TPF and AGP is actioned in post-curation TPF where possible.


In [1]:
agp_file = '/nfs/treeoflife-01/teams/tola/users/dp24/rapid-pretext/input-data/idSyrVitr1_1.pretext.agp'
tpf_file = '/nfs/treeoflife-01/teams/tola/users/dp24/rapid-pretext/input-data/idSyrVitr1.20221125.decontaminated.fa.tpf'

In [2]:
import pandas as pd
import math
import time
import sys
from itertools import islice
pd.set_option('expand_frame_repr', False)


In [3]:
# Global Paramters

# Helps keep tpfs together enough to minimise error
netsize = 3

# How many texels smaller than the smallest contig we will search for 
lowcutoff = 1.5

# Sex Chromosome IDS
sex = ["X","Y","Z","W"]

prefix = 'R'

borderlen = 80

In [65]:
def read_agp (agp: str) -> pd.DataFrame:
    """
    Reads the input agp, skips the 3*# lines, filters for lines that arn't gap scaffs, then drops the end columns.
    """
    agp_df = pd.read_csv(agp,sep = '\t',skiprows=3, names=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l'])
    agp_df["Notes"] = agp_df['j'].astype(str) +"-"+ agp_df["k"].astype(str)+"-"+ agp_df["l"].astype(str)
    agp_df.drop(inplace=True, columns=['j', 'k', 'l'])

    return agp_df

def read_tpf (tpf: str) -> pd.DataFrame:
    """
    Reads the input tpf file
    """
    return pd.read_csv(tpf,sep = '\t', names=['a', 'b', 'c', 'd'])

def reformat_tpf_df (df: pd.DataFrame) -> pd.DataFrame:
    """
    Reformat the tpf dataframe so this: `scaffold_1:1-2795873` becomes `scaffold_1  1  2795873 {length of scaff}`
    e.g. 1 column becomes 4
    """
    temp_df = pd.DataFrame(columns = ['id', 'tpf_start', 'tpf_end', 'length'])
    
    no_gaps = df[df['a'] != 'GAP']
    temp_df['id'] = no_gaps['b']
            
    temp_df[['scaffold', 'coords']] = temp_df['id'].str.split(":", expand=True)
    temp_df[['tpf_start','tpf_end']] = temp_df['coords'].str.split("-", expand=True)
    temp_df[['tpf_start','tpf_end']].convert_dtypes().astype(int)
    
    temp_df['length'] = round(
        temp_df['tpf_end'].astype('int32') - temp_df['tpf_start'].astype('int32')
    )

    df = pd.concat([df, temp_df], axis=1, join='outer')
    df.columns = ['GAP','id', 'scaffold', 'strand', 'd', 'tpf_start', 'tpf_end', 'length', 'd','d']
    df.drop(inplace=True, columns=['d'])
    temp_df = [] # Deletes temp_df to save mem

    return df

def append_dict(k: str, v: int, d: dict) -> dict:
    if k not in d:
        d[k]=[v]
    else:
        d[k].append(v)
    return d

def dividers (agp_df: pd.DataFrame) -> dict:
    dividers = {}
    return [append_dict(v.f, v.h, dividers) for i, v in agp_df.iterrows()]

def texel_calc (tpf: pd.DataFrame) -> int:
    """
    Count the size of genome based on scaffold lengths
    Calculates the number of texels in pretextmap
    """
    size = tpf['length'].sum()
    
    return size, int(round(size/32768,0))

def filter_keepers (agp_df: pd.DataFrame, tpf_df: pd.DataFrame, texel_size: int):
    """
    Discard all small fragments
    keep all scaffs > 10 texels in length calculate whether to keep
    """
    delete_list = []
    haplotig_list = []
    sex_list = []
    scaff_dict = {}
    discards = {}
    for i, v in agp_df.iterrows():
        if v.e == 'W':
            tpf = tpf_df[tpf_df['scaffold'] == v.f]
            # if superscaff end - start > 10 * texel_size
            if (v.c - v.b) > (10 * texel_size):
                append_dict(v.a,[v.f,v.g,v.h,v.i,v.b,v.c],scaff_dict)
            # TODO: should this just be against the largest (first) scaff for a scaff?
            elif v.c - v.b > [vv.length for ii, vv in tpf.iterrows()][0] - lowcutoff*texel_size:
                # Showcases a need to rename columns
                append_dict(v.a,[v.f,v.g,v.h,v.i,v.b,v.c],scaff_dict)
            else:
                # everything else isn't needed
                append_dict(v.a, v.g, discards)

            # No haplotig data in dataset
            if not pd.isnull(v.Notes):
                if 'HAPLOTIG' in v.Notes:
                    haplotig.append(v.Notes)
                for xxx in sex_list:
                    if xxx in v.Notes:
                        print(f"It's a {xxx}!!")
                        sex_list.append(xxx)

    return delete_list, haplotig_list, sex_list, scaff_dict, discards

def agp_dividers(agpdict: dict) -> dict:
    """
    """
    tdivs={}
    result={}
    for k,v in agpdict.items():
        [append_dict(i[0],int(i[2]),tdivs) for i in v]

    for k, v in tdivs.items():
        result[k]=sorted(v)
    return result

def double_check (tpf_end: int, agp_end: int, scaff_id: str, dic: dict):
    if abs(tpf_end - agp_end) < abs(tpf_end-dic[scaff_id]):
        dic[scaff_id] = tpf_end
    return dic

def get_nearest (tpf: pd.DataFrame, dividers: dict, texel_size: int) -> dict:
    """
    Attempt conversion of AGP coords to TPF coords to get closest scaff to agp coord
    """
    closest = {}
    tpf = tpf[tpf['GAP'] == '?']
    for k, v in dividers.items():
        for scaff_end in v:
            tpf['tpf_end'] = tpf['tpf_end'].astype(int)
            for index, line in tpf[tpf['scaffold'] == k].iterrows():
                ids = f'{k}:{str(scaff_end)}'
                if abs(line.tpf_end - scaff_end) < netsize * texel_size:
                    if ids not in closest:
                        closest[ids]=line.tpf_end
                    else:
                        double_check(line.tpf_end, scaff_end, ids, closest)
    return closest

def tpf_needs_breaks (dic: dict, dividers: dict, discards: dict) -> dict:
    """
    ID which scaffolds need breaking
    """
    needs_breaking = []
    for k, v in dividers.items():
        for div in v:
            if f'{k}:{str(div)}' not in dic:
                needs_breaking.append(k+"\t"+str(div))
                if k in discards:
                    for d in discards[k]:
                        if div + 1 == int(d):
                            # Removed data already removed earlier
                            needs_breaking.remove(k+"\t"+str(div))
    return needs_breaking

def chunks(data: dict, size: int) -> dict:
    """
    split data (dictionary) into chunks of size.
    """
    it = iter(data)
    for i in range(0, len(data), size):
        yield {k:data[k] for k in islice(it, size)}
        
def tpf_components_to_break(scaffs: list, tpf_df: pd.DataFrame) -> dict:
    break_scaffs = {}
    for item in scaffs:
        info = [item.split('\t')[0], item.split('\t')[1]]
        no_gaps = tpf_df[tpf_df['GAP'] == '?']
        temp_df = no_gaps.loc[
                        ( no_gaps['scaffold'] == info[0] ) &
                        ( no_gaps['tpf_start'].astype('int32') <= int(info[1]) ) &
                        ( no_gaps['tpf_end'].astype('int32') >= int(info[1]) )
        ]
        break_scaffs[':'.join(info)] = [str(temp_df.loc[temp_df.index[0]]['id']), list(temp_df.index)]
    return break_scaffs

def new_components ( scaffs: dict ) -> dict:
    new_breaks = {}
    for k, v in scaffs.items():
        break_info = [k.split(':')[0], int(k.split(':')[1])]
        scaff_info = [v[0].split(':')[0], int(v[0].split(':')[1].split('-')[0]), int(v[0].split('-')[1])]
        
        pre_break = [f'{scaff_info[0]}:{scaff_info[1]}-{break_info[1]}']
        pst_break = [f'{scaff_info[0]}:{break_info[1]+1}-{scaff_info[2]}']
        new_breaks[k] = [ v[1], [v[0]], pre_break, pst_break]
    return new_breaks

def copy_agp_to_tpf(closest: dict, agp_df: pd.DataFrame, tpf_df: pd.DataFrame, texel_size: int) -> pd.DataFrame:

    tpf_df['CHROM'] = ''
    tpf_df['PAINT'] = ''
    tpf_df['AGP_SCAFF'] = ''
    tpf_df['AGP_START'] = 0
    tpf_df['AGP_END'] = 0
    tpf_nogaps = tpf_df[tpf_df['GAP'] == '?']
    tpf_ids = tpf_nogaps[['scaffold','id','tpf_start', 'tpf_end']]
    agp_nogaps = agp_df[agp_df['e'] == 'W']

    check_list = {}
    for k, v in closest.items():
        scaffold_id, agp_end_coord = k.split(':')[0], int(k.split(':')[1])

        # Subset AGP based on closest dict (Alan made this,
        # it calcs for the last coord per chunk of agp and tpf (the chunks that sit inside agp coords)
        agp_temp = agp_nogaps.loc[
                        (agp_nogaps['h'].astype(int) == agp_end_coord) & 
                        (agp_nogaps['f'] == scaffold_id)
                            ]

        # IF agp start value isn't 1 use agp_start_coord - (texel*1.5) to get results else use else: 1
        if int(agp_temp['g'].values[0]) >= 1:
            agp_chunk_start = int(agp_temp['g'].values[0]) - (texel_size * 1.6)
        else:
            agp_chunk_start = 1

        # Subset tpf_ids df on agp subset start values 
        temp_tmp = tpf_ids.loc[
            (tpf_ids['tpf_end'].astype(int) <= v) &
            (tpf_ids['tpf_start'].astype(int) >= agp_chunk_start) &
            (tpf_ids['scaffold'] == scaffold_id)
        ]

        if len(temp_tmp) == 0:
            print(f"STOP\t: SEEMS TO BE AN EMPTY SUBFRAME\n {scaffold_id} Exists \n BUT NOTHING AT AGP CO-ORDS {agp_chunk_start} - {agp_end_coord} IN TPF")

        # Copy over CHROM information from AGP to tpf along with paint info (include hap info if required)
        for i in list(temp_tmp.index):
            tpf_df.at[i, 'CHROM'] = agp_temp.a.values[0]
            tpf_df.at[i, 'PAINT'] = agp_temp.Notes.values[0]
            tpf_df.at[i, 'AGP_SCAFF'] = agp_temp.a.values[0]
            tpf_df.at[i, 'AGP_START'] = agp_temp.b.values[0]
            tpf_df.at[i, 'AGP_END'] = agp_temp.c.values[0]

    # If gap label as gap 
    tpf_df.loc[tpf_df['GAP'] == 'GAP', ['CHROM', 'PAINT']] = 'GAP', 'GAP'

    # If scaffold but CHROM empty, it was too small for pretext (pretext forgets it exists) so we add it back in.
    tpf_df.loc[tpf_df['CHROM'] == '', ['CHROM', 'PAINT']] = '', 'nan-nan-nan'
    tpf_df.loc[tpf_df['CHROM'] == '', 'CHROM'] = tpf_df['scaffold']
    tpf_df.loc[tpf_df['AGP_SCAFF'] == '', 'AGP_SCAFF'] = tpf_df['scaffold']

    print(f' Print unique values in "PAINT": {tpf_df["PAINT"].unique()}')
    return tpf_df

def reporter (scaffs_broken: dict):
    [print(f'Proposed break @ \t {k} \n\t This drops scaffold: \t {v[1]}\n\t New components are: \t {v[2]} \n\t\t\t\t {v[3]}') for k, v in scaffs_broken.items()]

In [99]:
def main():
    
    # In the vein of Alan, lets time the script
    start_time = time.time()

    # Load the files into mem
    agp_df = read_agp(agp_file)
    tpf_df = read_tpf(tpf_file)

    # Reformat the tpf
    reformatted_tpf = reformat_tpf_df(tpf_df)

    # Calculate texel size and genome size
    genome_size, texel_size = texel_calc(reformatted_tpf)

    # Filter data for keeps, deletion and/or modification
    delete_list, hap_list, sex_list, scaff_dict, discards = filter_keepers(agp_df, reformatted_tpf, texel_size)

    # something Alan did that works
    dividers = agp_dividers(scaff_dict)
    
    # Write out dividers

    # Calculate closest scaffs to each scaffs
    closest = get_nearest(reformatted_tpf, dividers, texel_size)

    #bind chromosome and paint status to tpf from AGP
    agp_tpf = copy_agp_to_tpf(closest, agp_df, reformatted_tpf, texel_size)

    # ID thre scaffs to break
    scaff_2_break = tpf_needs_breaks(closest, dividers, discards)

    # Get components to break
    # TODO: May only check whether entire scaff falls into an existing scaff
    break_scaffs = tpf_components_to_break( scaff_2_break, agp_tpf)
    
    # Generate information on where breaks should be introduced
    new_breaks = new_components( break_scaffs ) 
    #### ADAPT FOR AGP_TPF
    # Al the broken bits have no chromos
    
    # Now have a nested list of [index_no., old scaff, new_component_1, new_component_2]
    # Creates actions to be added to reformatted_tpf
    for k, v in new_breaks.items():
        index_1, old, new1, new2 = v[0][0], v[1][0], v[2][0], v[3][0]
        split_up1 = new1.split(':')[1].split('-')
        split_up2 = new2.split(':')[1].split('-')
        
        # Subset the agp on the break end-coord (ensures only one scaff is returned)
        agp_subset = agp_df.loc[(agp_df['f'] == new1.split(':')[0]) & (agp_df['h'] == split_up1[1])]
        # Action add breaks
        # Format is: GAP	id	scaffold	strand	tpf_start	tpf_end	length	CHROM	PAINT
        agp_tpf.loc[int(index_1)+0.1] = "?", new1, agp_tpf.loc[int(index_1)]["CHROM"], agp_tpf.loc[int(index_1)]["strand"], split_up1[0], split_up1[1], abs(int(split_up1[0]) - int(split_up1[1])), agp_subset['a'].values[0], agp_subset['Notes'].values[0], agp_subset['a'].values[0], agp_subset['b'].values[0], agp_subset['c'].values[0]
        agp_tpf.loc[int(index_1)+0.2] = "GAP", "TYPE-2", 200, '', '', '', '', '', '', '', 0, 0
        agp_tpf.loc[int(index_1)+0.3] = "?", new2, agp_tpf.loc[int(index_1)]["CHROM"], agp_tpf.loc[int(index_1)]["strand"], split_up1[0], split_up1[1], abs(int(split_up1[0]) - int(split_up1[1])), agp_subset['a'].values[0], agp_subset['Notes'].values[0], agp_subset['a'].values[0], agp_subset['b'].values[0], agp_subset['c'].values[0]
        agp_tpf.loc[int(index_1)+0.4] = "GAP", "TYPE-2", 200, '', '', '', '', '', '', '', 0, 0
        
        # Action remove original scaff
        agp_tpf.drop(index=int(index_1), inplace=True)
        
    agp_tpf['CHROM'].replace( { r"Scaffold_" : 'R' }, inplace= True, regex = True)

    # Fix scaffold indexing on new
    agp_tpf = agp_tpf.sort_index().reset_index()
    breaks = [v[0][0] for k, v in new_breaks.items()]
    report_breaks = len(breaks)

    reporter(new_breaks)
    
    def sort_df(agp_tpf: pd.DataFrame): 
    
    def split_haps():
        # Need to join last three columns (due to length mismatch) and then 
        # Make new column based on regex for hap + number
        # use df.hap.unique() to get unique list of values
        # i for i in unique_haplist: make df subset, print
        pass

    end_time = time.time()
    timer = end_time - start_time
    print(f'I took {round(timer,2)} seconds')
    
    return agp_tpf, agp_df, texel_size, new_breaks

In [100]:
agp_tpf, agp_df, texel_size, new_breaks = main()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tpf['tpf_end'] = tpf['tpf_end'].astype(int)


 Print unique values in "PAINT": ['Painted-nan-nan' 'GAP' 'nan-nan-nan' 'Painted-X-nan' 'Painted-Y-nan']
Proposed break @ 	 scaffold_1:82516213 
	 This drops scaffold: 	 ['scaffold_1:81943984-83123586']
	 New components are: 	 ['scaffold_1:81943984-82516213'] 
				 ['scaffold_1:82516214-83123586']
Proposed break @ 	 scaffold_12:404258 
	 This drops scaffold: 	 ['scaffold_12:1-709395']
	 New components are: 	 ['scaffold_12:1-404258'] 
				 ['scaffold_12:404259-709395']
Proposed break @ 	 scaffold_8:1070094 
	 This drops scaffold: 	 ['scaffold_8:967644-1232526']
	 New components are: 	 ['scaffold_8:967644-1070094'] 
				 ['scaffold_8:1070095-1232526']
Proposed break @ 	 scaffold_3:37488994 
	 This drops scaffold: 	 ['scaffold_3:36676041-37788797']
	 New components are: 	 ['scaffold_3:36676041-37488994'] 
				 ['scaffold_3:37488995-37788797']
Proposed break @ 	 scaffold_6:7336095 
	 This drops scaffold: 	 ['scaffold_6:7239825-7587257']
	 New components are: 	 ['scaffold_6:7239825-7336095'

# Notes
REMOVED should be renames as their scaffold and kept

OUTPUT: 1 column csv of R-number
        for x in hap_list, create subset df and -> tsv ; tetraploid  would be a good aim, octo is a possibility

GenomeArk needs some special cases for shrap/hap handling
Needs test case for that plus multi-hap cases


In [111]:
def remap_unpainted(agp: pd.DataFrame, agp_tpf: pd.DataFrame, texel_size: int): # How to do multiple output type hints?
    """
    Function takes 
    """
    agp_tpf_nogap = agp_tpf.loc[(agp_tpf['GAP'] == '?' )]
    subset_unpainted = agp_tpf_nogap.loc[(agp_tpf_nogap['length'].astype(int) > texel_size) & agp_tpf_nogap['CHROM'].str.contains('scaffold')]
    agp_nogaps = agp_df.loc[(agp_df['e'] == 'W' )]

    unpainted_unknown = []
    unmatched_chrom = []
    # Seems like alot of pieces > 10 texels get no painting for what ever reason
    # This ID's scaffolds which fall into an agp chunk and 'repaints' them

    for i in subset_unpainted.iterrows():
        print(f' ----- {i[1].id} -----{i[1].CHROM}---{i[1].PAINT}')
        subset = agp_nogaps.loc[
                    (agp_nogaps['f'] == i[1].scaffold) &
                    (int(i[1].tpf_start) >= (agp_nogaps['g'].astype('int') - (texel_size *1.5))) &
                    (int(i[1].tpf_end) <= (agp_nogaps['h'].astype('int') + (texel_size*1.5)))]
        if len(subset) > 0 and len(subset) < 2:
        # RE-PAINT!
            agp_tpf.at[i[0], 'CHROM'] = subset.a.values[0]
            agp_tpf.at[i[0], 'AGP_SCAFF'] = subset.a.values[0]
            agp_tpf.at[i[0], 'AGP_START'] = subset.b.values[0]
            agp_tpf.at[i[0], 'AGP_END'] = subset.c.values[0]
        # OR DIE!
        #else:
         #   print(f'Multiple or 0 results found when comparing agp+tpf to agp_df, should be investigated')
          #  print(agp_nogaps[agp_nogaps['f'] == i[1].CHROM])
           # print(i)
            #print(subset)
            #sys.exit(1)

    # For peices that the above doesn't catch...
    # Subset them and PAINT!!!

    df_of_unpainted = agp_tpf_nogap.loc[
                        (agp_tpf_nogap['scaffold'].str.startswith('scaffold')) &
                        (agp_tpf_nogap['CHROM'].str.startswith('scaffold')) &
                        (agp_tpf_nogap['length'] > texel_size)]

    for i in df_of_unpainted.index.tolist():

        # If prior scaffolds end coord + gap + 1 == scaffold of interest start this means they are continuous and together
        # If this is further confirmed by the next scaffold following off the current scaffold
        # I assume it is part of the same chromosome assigned by AGP
        # else add index to a list, and rewrite to ensure there's a unique name ( UN1, UN2... )
        print(agp_tpf.loc[int(i)]['tpf_end'])
        
        end_before = (int(agp_tpf.loc[int(i)-2]['tpf_end']) + 201)
        current_start = int(agp_tpf.loc[int(i)]['tpf_start'])
        current_end = int(agp_tpf.loc[int(i)]['tpf_end'])
        after_start = (int(agp_tpf.loc[int(i)+2]['tpf_start']) - 201)

        if ( end_before == current_start ) and ( current_end == after_start ):
            print(i)
            if (agp_tpf.loc[int(i)-2]['CHROM']) == (agp_tpf.loc[int(i)+2]['CHROM']):
                agp_tpf.loc[i, 'CHROM'] = agp_tpf.loc[int(i)-2]['CHROM']
                agp_tpf.loc[i, 'AGP_SCAFF'] = agp_tpf.loc[int(i)-2]['AGP_SCAFF']
                agp_tpf.loc[i, 'AGP_START'] = agp_tpf.loc[int(i)-2]['AGP_START']
                agp_tpf.loc[i, 'AGP_END'] = agp_tpf.loc[int(i)-2]['AGP_END']
            else:
                unmatched_chrom.append(i)
        else:
            unpainted_unknown.append(i)
    return agp_tpf, unmatched_chrom, unpainted_unknown

In [112]:
agp_tpf, unmatched_chrom, unpainted_unknown = remap_unpainted(agp_df, agp_tpf, texel_size)

In [119]:
agp_tpf.loc[(agp_tpf['scaffold'] == 'scaffold_2')].head(50)

Unnamed: 0,index,GAP,id,scaffold,strand,tpf_start,tpf_end,length,CHROM,PAINT,AGP_SCAFF,AGP_START,AGP_END
214,211.0,?,scaffold_2:1-152107,scaffold_2,PLUS,1,152107,152106.0,R2,Painted-nan-nan,Scaffold_2,49559280,51485449
216,213.0,?,scaffold_2:152308-611593,scaffold_2,PLUS,152308,611593,459285.0,R2,Painted-nan-nan,Scaffold_2,49559280,51485449
218,215.0,?,scaffold_2:611794-1057784,scaffold_2,PLUS,611794,1057784,445990.0,R2,Painted-nan-nan,Scaffold_2,49559280,51485449
220,217.0,?,scaffold_2:1057985-1902362,scaffold_2,PLUS,1057985,1902362,844377.0,R2,Painted-nan-nan,Scaffold_2,49559280,51485449
222,219.0,?,scaffold_2:1902563-2090932,scaffold_2,PLUS,1902563,2090932,188369.0,R2,nan-nan-nan,Scaffold_2,49559280,51485449
224,221.0,?,scaffold_2:2091133-2607808,scaffold_2,PLUS,2091133,2607808,516675.0,R2,Painted-nan-nan,Scaffold_2,48393965,49559179
226,223.0,?,scaffold_2:2608009-2775289,scaffold_2,PLUS,2608009,2775289,167280.0,R2,Painted-nan-nan,Scaffold_2,48393965,49559179
228,225.0,?,scaffold_2:2775490-2832131,scaffold_2,PLUS,2775490,2832131,56641.0,R2,Painted-nan-nan,Scaffold_2,48393965,49559179
230,227.0,?,scaffold_2:2832332-3062477,scaffold_2,PLUS,2832332,3062477,230145.0,R2,Painted-nan-nan,Scaffold_2,48393965,49559179
232,229.0,?,scaffold_2:3062678-3226665,scaffold_2,PLUS,3062678,3226665,163987.0,R2,nan-nan-nan,Scaffold_2,48393965,49559179


In [120]:
agp_tpf.loc[(agp_tpf['scaffold'] == 'scaffold_2')].tail(20)

Unnamed: 0,index,GAP,id,scaffold,strand,tpf_start,tpf_end,length,CHROM,PAINT,AGP_SCAFF,AGP_START,AGP_END
304,301.0,?,scaffold_2:22150898-22306334,scaffold_2,PLUS,22150898,22306334,155436.0,R2,Painted-nan-nan,Scaffold_2,55279438,79178225
306,303.0,?,scaffold_2:22306535-23578112,scaffold_2,PLUS,22306535,23578112,1271577.0,R2,Painted-nan-nan,Scaffold_2,55279438,79178225
308,305.0,?,scaffold_2:23578313-26033547,scaffold_2,PLUS,23578313,26033547,2455234.0,R2,Painted-nan-nan,Scaffold_2,55279438,79178225
310,307.0,?,scaffold_2:26033748-28280695,scaffold_2,PLUS,26033748,28280695,2246947.0,R2,Painted-nan-nan,Scaffold_2,55279438,79178225
312,309.0,?,scaffold_2:28280896-30349145,scaffold_2,PLUS,28280896,30349145,2068249.0,R2,Painted-nan-nan,Scaffold_2,79261655,89070858
314,311.0,?,scaffold_2:30349346-35558489,scaffold_2,PLUS,30349346,35558489,5209143.0,R2,Painted-nan-nan,Scaffold_2,79261655,89070858
316,313.0,?,scaffold_2:35558690-36315047,scaffold_2,PLUS,35558690,36315047,756357.0,R2,Painted-nan-nan,Scaffold_2,79261655,89070858
318,315.0,?,scaffold_2:36315248-36858247,scaffold_2,PLUS,36315248,36858247,542999.0,R2,Painted-nan-nan,Scaffold_2,79261655,89070858
320,317.0,?,scaffold_2:36858448-37050059,scaffold_2,PLUS,36858448,37050059,191611.0,R2,Painted-nan-nan,Scaffold_2,79261655,89070858
322,319.0,?,scaffold_2:37050260-38094920,scaffold_2,PLUS,37050260,38094920,1044660.0,R2,Painted-nan-nan,Scaffold_2,79261655,89070858
