## 设计TE的探针，不再进行过滤，而是进行打分
## 只用全长进行比对，即62bp；
## 首先和基因组进行比对，使用BLASTN，然后和rmsk进行比对，使用Bowtie2。
## 接受多条输入

In [1]:
TE_INPUT = {
   'Lx': [
        'chr1|129376168|129376548|+'
   ],
    
    'IAPLTR3-int': [
        'chr2|85788757|85789706|+'
   ],
    
    'Lx2B': [
        'chr6|21242260|21243219|-',
        'chr8|33101938|33102145|+'
   ],
    
    'L1Lx_II': [
        'chr1|32424978|32425767|-',
   ],
    
    'RLTR4_Mm': [
        'chr4|133886950|133887248|+',
        'chr4|133891467|133891737|+'
   ],
    
    'RLTR44-int': [
        'chr11|34541427|34541644|+',
        'chr11|34551131|34551356|+'
   ],
}

In [2]:
len(TE_INPUT)

6

In [3]:
TE_INPUT.keys()

dict_keys(['Lx', 'IAPLTR3-int', 'Lx2B', 'L1Lx_II', 'RLTR4_Mm', 'RLTR44-int'])

### 1.生成candidates

In [133]:
key = 'IAPLTR3-int'
bridge_id = 'Pad_1'

In [134]:
from pyfaidx import Fasta
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq

def get_genome_seq(chr_name:str, start:int, end:int, strand: str) -> SeqRecord:
    
    mouse_fasta_path = "/media/duan/sda2/Reference_DataBase/mm39/GRCm39.genome.fa"
    
    fasta_ref = Fasta(mouse_fasta_path)

    target = fasta_ref[chr_name][start:end]
    
    if strand == "-":
        target = target.complement
    
    return SeqRecord(seq = Seq(target.seq), id = f"{chr_name}|{start}|{end}", description=strand)

In [135]:
sequences = []
for location in TE_INPUT[key]:
    chrom, start, end, strand = location.split("|")
    sequences.append(get_genome_seq(chrom, int(start), int(end), strand))

In [136]:
len(sequences)

1

In [137]:
sequences

[SeqRecord(seq=Seq('CTGGTGCCGAAGATCCCGGGAATTCTACAACATCGCCGGCATGGAGGAGGTCCC...GAG'), id='chr2|85788757|85789706', name='<unknown name>', description='+', dbxrefs=[])]

In [138]:
import pandas as pd
import numpy as np
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from Bio.Seq import reverse_complement
from collections import Counter

def findAllCandidates(seqs: list):
    
    All_probes = []
    ALL_probes_count = Counter()
    
    for target in seqs:
        limit = len(target)
        for i in range(0, limit-62):
            
            start = i
            end = i + 62
            target_region = target.seq[start:end].upper()
            
            if target.description == '-':
                
                probe = SeqRecord(target_region, id = f"{target.id}_{start}_plus", description=str(start))
                before_len = len(ALL_probes_count)
                ALL_probes_count[str(target_region)] += 1
                after_len = len(ALL_probes_count)
                if before_len != after_len:
                    All_probes.append(probe)
            
            if target.description == '+':
                
                target_region = target_region.reverse_complement()
                probe = SeqRecord(target_region, id = f"{target.id}_{start}_minus", description=str(start))
                before_len = len(ALL_probes_count)
                ALL_probes_count[str(target_region)] += 1
                after_len = len(ALL_probes_count)
                if before_len != after_len:
                    All_probes.append(probe)
                
 
    prbs1 = []
    prbs2 = []
    
    for probe in All_probes:
        
        p1_rec = probe[32:62]
        p2_rec = probe[0:30]
        
        p1_rec.id = probe.id + '_p1'
        p2_rec.id = probe.id + '_p2'
        
        prbs1.append(p1_rec)
        prbs2.append(p2_rec)
    
    return pd.DataFrame({"Region": All_probes, "Probe1":prbs1, "Probe2":prbs2}), ALL_probes_count

In [139]:
Probes, probes_counts = findAllCandidates(seqs = sequences)

In [140]:
len(Probes)

887

### 2.进行Basic过滤

In [141]:
from pandarallel import pandarallel
pandarallel.initialize(progress_bar = False, verbose=0, nb_workers = 16)

from Bio.SeqUtils import MeltingTemp
from Bio.Seq import Seq
import pandas as pd
import numpy as np
import math
from collections import Counter
import pysam as ps

In [142]:
def get_tm(seq):
    
    tmp = MeltingTemp.Tm_NN(seq, nn_table=MeltingTemp.R_DNA_NN1, dnac1 = 20, 
                            selfcomp = True, Na = 390, Mg = 0)
    
    GC = (seq.count("G") + seq.count("C"))/len(seq) * 100
    
    return MeltingTemp.chem_correction(tmp, fmd=30 * 0.2222, fmdmethod=2, GC=GC)

In [143]:
def basicFilter(probe):
    
    probe = str(probe.seq)
    
    # filter Ns
    if "N" in probe:
        return "FAILED"

    base_count = Counter(probe)
    total_bases = len(probe)
    
    # Filter Complexity
    # Shannon entropy
    frequencies = {base: count / total_bases for base, count in base_count.items()}
    entropy = -sum(freq * math.log2(freq) for freq in frequencies.values() if freq > 0)
    
    if entropy < 1:
        return "FAILED"
    
    # filter base percentage
    GC = (base_count['G'] + base_count['C'])/total_bases * 100
    if GC < 20 or GC > 80:
        return "FAILED"
    
    # filter repeats
    REPEATS_LIST = [
        "GGGGG", "CCCCC"
    ]
    
    repeats_count = 0
    for BL in REPEATS_LIST:
        if probe.count(BL) > 0:
            return "FAILED"
    
    # filter tm
    tm = get_tm(probe)
    if tm < 42:
        return "FAILED"
    
            
    return "PASS"

In [144]:
Probes["basicFilter1"] = Probes["Probe1"].parallel_apply(lambda x:basicFilter(x))
Probes["basicFilter2"] = Probes["Probe2"].parallel_apply(lambda x:basicFilter(x))
Probes = Probes[(Probes["basicFilter1"] == "PASS") & (Probes["basicFilter2"] == "PASS")].copy()

In [145]:
len(Probes)

551

In [146]:
Probes

Unnamed: 0,Region,Probe1,Probe2,basicFilter1,basicFilter2
20,"(T, G, C, A, G, T, T, C, T, G, A, A, C, C, C, ...","(C, C, T, C, C, T, C, C, A, T, G, C, C, G, G, ...","(T, G, C, A, G, T, T, C, T, G, A, A, C, C, C, ...",PASS,PASS
21,"(C, T, G, C, A, G, T, T, C, T, G, A, A, C, C, ...","(A, C, C, T, C, C, T, C, C, A, T, G, C, C, G, ...","(C, T, G, C, A, G, T, T, C, T, G, A, A, C, C, ...",PASS,PASS
22,"(A, C, T, G, C, A, G, T, T, C, T, G, A, A, C, ...","(G, A, C, C, T, C, C, T, C, C, A, T, G, C, C, ...","(A, C, T, G, C, A, G, T, T, C, T, G, A, A, C, ...",PASS,PASS
23,"(C, A, C, T, G, C, A, G, T, T, C, T, G, A, A, ...","(G, G, A, C, C, T, C, C, T, C, C, A, T, G, C, ...","(C, A, C, T, G, C, A, G, T, T, C, T, G, A, A, ...",PASS,PASS
24,"(C, C, A, C, T, G, C, A, G, T, T, C, T, G, A, ...","(G, G, G, A, C, C, T, C, C, T, C, C, A, T, G, ...","(C, C, A, C, T, G, C, A, G, T, T, C, T, G, A, ...",PASS,PASS
...,...,...,...,...,...
809,"(A, T, G, A, G, C, T, C, C, T, T, A, T, C, T, ...","(C, T, G, G, C, A, T, G, C, T, G, T, C, T, T, ...","(A, T, G, A, G, C, T, C, C, T, T, A, T, C, T, ...",PASS,PASS
810,"(T, A, T, G, A, G, C, T, C, C, T, T, A, T, C, ...","(T, C, T, G, G, C, A, T, G, C, T, G, T, C, T, ...","(T, A, T, G, A, G, C, T, C, C, T, T, A, T, C, ...",PASS,PASS
811,"(T, T, A, T, G, A, G, C, T, C, C, T, T, A, T, ...","(T, T, C, T, G, G, C, A, T, G, C, T, G, T, C, ...","(T, T, A, T, G, A, G, C, T, C, C, T, T, A, T, ...",PASS,PASS
812,"(T, T, T, A, T, G, A, G, C, T, C, C, T, T, A, ...","(T, T, T, C, T, G, G, C, A, T, G, C, T, G, T, ...","(T, T, T, A, T, G, A, G, C, T, C, C, T, T, A, ...",PASS,PASS


### 3.和Ref_seq进行比对

In [147]:
def write_to_fasta(Probes, fastafile):
    
    with open(fastafile, "w") as handle:
        for srec in Probes:
            handle.write(f">{srec.id}\n")
            handle.write(f"{str(srec.seq)}\n")

In [148]:
fastafile = "./TMP/region_candidates.fasta"
alignfile = "./TMP/region_candidates_alignment.tsv"

write_to_fasta(Probes["Region"].to_list(), fastafile)

In [149]:
import subprocess
    
def blastn(fastafile, result_path, evalue = 5, taxids = "10090", strand = 'minus'):
    """
    runs blastn to create alignment results from a fasta file
    """
    call = [
        '~/software/ncbi-blast-2.15.0+/bin/blastn',
        "-query", fastafile,
        "-db", '../blastndb/refseq_rna',
        "-task", "blastn-short",
        "-evalue", str(evalue),
        "-strand", strand,
        "-out", result_path,
        "-outfmt", r'"6 qseqid sacc pident length mismatch qcovs evalue qseq sseq stitle"',
        "-taxids", taxids,
        "-num_threads", '16'
        ]
    
    subprocess.run(" ".join(call), shell = True, check = True)

In [150]:
blastn(fastafile = fastafile, result_path = alignfile, taxids = '10090', evalue = 100, strand = 'minus')
alignedProbe = pd.read_csv(alignfile, sep = "\t",
                       names=["qseqid","sacc","pident","length","mismatch","qcovs","evalue", "qseq", "sseq","stitle"],
                           index_col = 0)

In [151]:
def non_specific_check(probe, alignedProbe):
        
    pid = probe.id
    
    try:
        sub_df = alignedProbe.loc[pid,:].copy()
    except:
        return 100
        
    if isinstance(sub_df,pd.core.series.Series):
        
        return sub_df['evalue']
                
    else:
            
        return sub_df['evalue'].mean()

In [152]:
Probes["BLASTn_score"] = Probes["Region"].parallel_apply(lambda x:non_specific_check(x, alignedProbe = alignedProbe))

In [153]:
Probes.sort_values('BLASTn_score')

Unnamed: 0,Region,Probe1,Probe2,basicFilter1,basicFilter2,BLASTn_score
781,"(G, T, T, T, C, T, G, G, C, A, T, G, C, T, G, ...","(C, G, T, T, T, C, T, T, G, T, C, C, C, T, C, ...","(G, T, T, T, C, T, G, G, C, A, T, G, C, T, G, ...",PASS,PASS,48.541358
779,"(T, T, C, T, G, G, C, A, T, G, C, T, G, T, C, ...","(T, T, T, C, T, T, G, T, C, C, C, T, C, A, A, ...","(T, T, C, T, G, G, C, A, T, G, C, T, G, T, C, ...",PASS,PASS,48.619527
780,"(T, T, T, C, T, G, G, C, A, T, G, C, T, G, T, ...","(G, T, T, T, C, T, T, G, T, C, C, C, T, C, A, ...","(T, T, T, C, T, G, G, C, A, T, G, C, T, G, T, ...",PASS,PASS,48.697546
777,"(C, T, G, G, C, A, T, G, C, T, G, T, C, T, T, ...","(T, C, T, T, G, T, C, C, C, T, C, A, A, C, C, ...","(C, T, G, G, C, A, T, G, C, T, G, T, C, T, T, ...",PASS,PASS,48.851250
782,"(A, G, T, T, T, C, T, G, G, C, A, T, G, C, T, ...","(A, C, G, T, T, T, C, T, T, G, T, C, C, C, T, ...","(A, G, T, T, T, C, T, G, G, C, A, T, G, C, T, ...",PASS,PASS,48.851829
...,...,...,...,...,...,...
314,"(A, G, C, C, T, G, C, T, C, C, C, G, G, A, G, ...","(T, G, C, C, A, T, G, C, T, C, C, A, C, T, C, ...","(A, G, C, C, T, G, C, T, C, C, C, G, G, A, G, ...",PASS,PASS,72.724370
313,"(G, C, C, T, G, C, T, C, C, C, G, G, A, G, C, ...","(G, C, C, A, T, G, C, T, C, C, A, C, T, C, T, ...","(G, C, C, T, G, C, T, C, C, C, G, G, A, G, C, ...",PASS,PASS,72.724370
315,"(A, A, G, C, C, T, G, C, T, C, C, C, G, G, A, ...","(C, T, G, C, C, A, T, G, C, T, C, C, A, C, T, ...","(A, A, G, C, C, T, G, C, T, C, C, C, G, G, A, ...",PASS,PASS,72.729707
316,"(G, A, A, G, C, C, T, G, C, T, C, C, C, G, G, ...","(G, C, T, G, C, C, A, T, G, C, T, C, C, A, C, ...","(G, A, A, G, C, C, T, G, C, T, C, C, C, G, G, ...",PASS,PASS,72.740249


### 4.和rmsk进行比对

In [154]:
bowtie2file = "./TMP/region_candidates_alignment.sam"

In [155]:
def bowtie2(fastafile:str, result_path:str):
 
    db = '/media/duan/sda2/Reference_DataBase/mm39/bowtie2index/GRCm39'
           
    call = [
        'bowtie2',
        '-x', db,
        '-U', fastafile,
        '-S', result_path,
        '-f',
        '--local',
        '-D 20 -R 3 -N 1 -L 20 -i S,1,0.50',
        '-a',
        '--reorder',
        '-p', '16'
    ]
    
    cp = subprocess.run(" ".join(call), shell = True, check = True, capture_output=True)

In [156]:
bowtie2(fastafile = fastafile, result_path = bowtie2file)

In [157]:
def bedtools_intersect_filter(bowtie2file, TE_INPUT, key):
    
    import subprocess
    
    # 先将sam文件压缩成bam
    
    command = f'samtools view -Sb {bowtie2file} > ./TMP/region_candidates_alignment.bam'
    cp = subprocess.run(command, shell = True, check = True, capture_output = True)
    
    # 然后进行intersect
    
    rmsk_bed = '/media/duan/sda2/Reference_DataBase/mm39/GRCm39_GENCODE_rmsk_TE.bed'
    command = f"bedtools intersect -a ./TMP/region_candidates_alignment.bam -b {rmsk_bed} -wa -wb -bed > ./TMP/region_candidates_alignment.bed"
    cp = subprocess.run(command, shell = True, check = True, capture_output = True)
    
    Bowtie2_On_score = Counter()
    Bowtie2_Off_score = Counter()
    
    with open("./TMP/region_candidates_alignment.bed", 'r') as handle:
        for line in handle.readlines():
            parsed_line = line.strip().split("\t")
            subject = "|".join(parsed_line[0:3]) + f"|{parsed_line[5]}"
            if subject in TE_INPUT[key]:
                Bowtie2_On_score[parsed_line[3]] += 1
            else:
                Bowtie2_Off_score[parsed_line[3]] += 1
                
    return Bowtie2_On_score, Bowtie2_Off_score

In [158]:
Bowtie2_On_score, Bowtie2_Off_score = bedtools_intersect_filter(bowtie2file, TE_INPUT, key)

In [159]:
Off_list = []
On_list = []
for x in Probes['Region']:
    try:
        tmp = Bowtie2_Off_score[x.id]
    except KeyError:
        Off_list.append(0)
        
    Off_list.append(tmp)
    
    try:
        tmp = Bowtie2_On_score[x.id]
    except KeyError:
        On_list.append(0)
    
    On_list.append(tmp)

In [160]:
Probes['Bowtie2_On_Score'] = On_list
Probes['Bowtie2_Off_Score'] = Off_list

In [161]:
Probes

Unnamed: 0,Region,Probe1,Probe2,basicFilter1,basicFilter2,BLASTn_score,Bowtie2_On_Score,Bowtie2_Off_Score
20,"(T, G, C, A, G, T, T, C, T, G, A, A, C, C, C, ...","(C, C, T, C, C, T, C, C, A, T, G, C, C, G, G, ...","(T, G, C, A, G, T, T, C, T, G, A, A, C, C, C, ...",PASS,PASS,65.288772,0,27
21,"(C, T, G, C, A, G, T, T, C, T, G, A, A, C, C, ...","(A, C, C, T, C, C, T, C, C, A, T, G, C, C, G, ...","(C, T, G, C, A, G, T, T, C, T, G, A, A, C, C, ...",PASS,PASS,65.494302,0,29
22,"(A, C, T, G, C, A, G, T, T, C, T, G, A, A, C, ...","(G, A, C, C, T, C, C, T, C, C, A, T, G, C, C, ...","(A, C, T, G, C, A, G, T, T, C, T, G, A, A, C, ...",PASS,PASS,66.002075,0,29
23,"(C, A, C, T, G, C, A, G, T, T, C, T, G, A, A, ...","(G, G, A, C, C, T, C, C, T, C, C, A, T, G, C, ...","(C, A, C, T, G, C, A, G, T, T, C, T, G, A, A, ...",PASS,PASS,65.181174,0,29
24,"(C, C, A, C, T, G, C, A, G, T, T, C, T, G, A, ...","(G, G, G, A, C, C, T, C, C, T, C, C, A, T, G, ...","(C, C, A, C, T, G, C, A, G, T, T, C, T, G, A, ...",PASS,PASS,65.793591,0,29
...,...,...,...,...,...,...,...,...
809,"(A, T, G, A, G, C, T, C, C, T, T, A, T, C, T, ...","(C, T, G, G, C, A, T, G, C, T, G, T, C, T, T, ...","(A, T, G, A, G, C, T, C, C, T, T, A, T, C, T, ...",PASS,PASS,59.045402,0,52
810,"(T, A, T, G, A, G, C, T, C, C, T, T, A, T, C, ...","(T, C, T, G, G, C, A, T, G, C, T, G, T, C, T, ...","(T, A, T, G, A, G, C, T, C, C, T, T, A, T, C, ...",PASS,PASS,59.110857,0,59
811,"(T, T, A, T, G, A, G, C, T, C, C, T, T, A, T, ...","(T, T, C, T, G, G, C, A, T, G, C, T, G, T, C, ...","(T, T, A, T, G, A, G, C, T, C, C, T, T, A, T, ...",PASS,PASS,59.739429,0,60
812,"(T, T, T, A, T, G, A, G, C, T, C, C, T, T, A, ...","(T, T, T, C, T, G, G, C, A, T, G, C, T, G, T, ...","(T, T, T, A, T, G, A, G, C, T, C, C, T, T, A, ...",PASS,PASS,60.211299,0,58


### 5.拼接成完整序列

In [162]:
def get_bridge_seq(bridge_id):
        
    bseqs = pd.read_csv('./BridgeSequence/Bridges_used_for_assemble_20240704.tsv', sep="\t",index_col=0)
    return bseqs.loc[bridge_id,"Seq"]

bridge_seq = get_bridge_seq(bridge_id)

In [163]:
def get_full_probes1(p1, bridge_seq):
    
    """
    该函数用于拼接完整的p1序列
    """
    return bridge_seq[0:14] + "ta" + p1

def get_full_probes2(p2, bridge_seq):
    
    """
    该函数用于拼接完整的p2序列
    """
    return p2 + "ta" + bridge_seq[-14:]

In [164]:
Probes["Full_Probe1"] = Probes["Probe1"].parallel_apply(lambda x:get_full_probes1(x, bridge_seq = bridge_seq))
Probes["Full_Probe2"] = Probes["Probe2"].parallel_apply(lambda x:get_full_probes2(x, bridge_seq = bridge_seq))

### 6.进行打分

In [165]:
# Probes = Probes[Probes['Bowtie2_On_Score'] > 0].copy()

In [166]:
import numpy as np

In [167]:
def get_evalue(row):
    

    part1 = 4 / (row['BLASTn_score'] + 1)
    
    part2 = 8 / max([row['Bowtie2_On_Score'], 1]) # 0 - 8

    part3 = min([np.log10(row['Bowtie2_Off_Score'] + 1), 4]) # 0 - 4
    
    return part1 + part2 + part3

In [168]:
Probes['Evaluation'] = Probes.parallel_apply(lambda row: get_evalue(row), axis = 1)

In [169]:
Probes = Probes.sort_values('Evaluation')

In [170]:
result = pd.DataFrame({
        'name': key,
        'probe_name': [x.id for x in Probes["Region"]],
        'position':[x.description for x in Probes["Region"]],
        'probe A':[str(x.seq) for x in Probes["Full_Probe1"]],
        'probe B':[str(x.seq) for x in Probes["Full_Probe2"]],
        "Evaluation": Probes['Evaluation'],
        "BLASTn_score": Probes['BLASTn_score'],
        "Bowtie2_On_Score": Probes['Bowtie2_On_Score'],
        "Bowtie2_Off_Score": Probes['Bowtie2_Off_Score'],
        'bridge_id': bridge_id,
        'probe_strand': [x.id.split("_")[-1] for x in Probes["Region"]]})

In [171]:
result = result.reset_index(drop = True)

In [172]:
result.to_csv(f"{key}_result.csv")

In [173]:
KEEP_INDICES = [
    10,46,49,137,
] # input index of selected probes manually
result = result.loc[KEEP_INDICES, :].copy()
result = result.reset_index(drop = True)

In [174]:
def generate_sangong(data):
    # This Block is used to generate name + sequence sangong accepted.
    # The name format is "[GENE]_[ProbePosition]_[ProbeNumber][ProbeLength]_[PadNumber]", such as H2-Ab1_506_1A30_7196.
    final_name = []
    final_primer = []
    gene_check = []
    Final_gene = []
    for i in range(len(data)):
        pos1 = data.loc[i,"name"]
        gene_check.append(pos1)
        number_ = [j for j in gene_check if j == pos1]
        number = len(number_)
        
        bridge_id = data.loc[i,"bridge_id"]
        if bridge_id.startswith("Pad"):
            pos5 = "P" + bridge_id.split("_")[-1]
        else:
            try:
                pos5 = bridge_id.split("seq")[1]
            except:
                pos5 = bridge_id.split("bridge")[1]
            
        pos2 = str(int(data.loc[i,"position"]))
        pos3 = data.loc[i,"probe A"]
        pos4 = data.loc[i,"probe B"]
        final1 = pos1+"_"+pos2+"_" + str(number) +"A" + "_" + pos5
        final2 = pos1+"_"+pos2+"_" + str(number) +"B" + "_" + pos5

        final_name.append(final1)
        final_name.append(final2)
        final_primer.append(pos3)
        final_primer.append(pos4)
        Final_gene.append(pos1)
        Final_gene.append(pos1)

    print("There are {} sequences in total.".format(str(len(final_name))))
    final_df = pd.DataFrame({"name":final_name, "seq":final_primer,"gene":Final_gene})
    
    return final_df

In [175]:
final_df = generate_sangong(result)
final_df.to_csv(f"{key}_sangon.csv", sep=",", index = False)
final_df

There are 8 sequences in total.


Unnamed: 0,name,seq,gene
0,IAPLTR3-int_358_1A_P30330,CAGTAACGTTCGCAtaTCCCAAAACAGAAGCCTGCTCCCGGAGCAG,IAPLTR3-int
1,IAPLTR3-int_358_1B_P30330,AGAAAAAGTCTGTTCTCAAAACAAAAGCCTtaATCAGGGTTACAAT,IAPLTR3-int
2,IAPLTR3-int_251_2A_P30330,CAGTAACGTTCGCAtaACGTAGGACTGACCGCACATCAAGGTCACA,IAPLTR3-int
3,IAPLTR3-int_251_2B_P30330,CTGACTGAGCTTGTTAGTGCTGCCGTTTATtaATCAGGGTTACAAT,IAPLTR3-int
4,IAPLTR3-int_67_3A_P30330,CAGTAACGTTCGCAtaACTTACGATTTCCACTGCAGTTCTGAACCC,IAPLTR3-int
5,IAPLTR3-int_67_3B_P30330,CAACAGGCTCACAGTTAAGCATGTCTCTCTtaATCAGGGTTACAAT,IAPLTR3-int
6,IAPLTR3-int_635_4A_P30330,CAGTAACGTTCGCAtaCTGTGGTCAAGGAACCTGAGCAAGCATACC,IAPLTR3-int
7,IAPLTR3-int_635_4B_P30330,ATCTACGTCTCTTCCTAACTTGTTCCACGAtaATCAGGGTTACAAT,IAPLTR3-int
