### TODO 基于PAV的断点，我们对syntenic的区间进行划分
> syntenic区间减去PAVs后的区间
> 由于A2和At的syntenic区域存在1对多的情况，在将PAV和Divergence区域进行合并时，会存问题
> 
> 这里采用，对每个共线性区域减去PAV区域，从而获得相对应的syntenic和syntenic_PAV区域

In [None]:
import pandas as pd

In [None]:
syntenic_intersected_PAV = pd.read_csv(
    "./A2_vs_At_PAV_synteny.txt",
    header=0, index_col=None, sep="\t"
)

In [None]:
def getSyntenicMatchRegion(syntenicRegion,syntenicPAV):
    '''根据A2和At在共线性区域的PAV序列,对齐syntenic与PAVs区域的序列
    @return pd.DataFrame
    '''
    A2MatchRegion=[
    syntenicRegion[1],
    syntenicRegion[2] 
    ]
    AtMatchRegion=[
        syntenicRegion[4],
        syntenicRegion[5],
    ]
    for val in syntenicPAV.values:
        J85PAVStart=val[8] 
        J85PAVEnd=val[9] 
        AtPAVStart=val[11]
        AtPAVEnd=val[12]
        #* 添加A2 vs At的PAV断点的信息，断点有可能超出syntenic区间范围！
        if J85PAVStart<=syntenicRegion[1]:
            J85PAVStart=syntenicRegion[1]
        if J85PAVEnd>=syntenicRegion[2]:
            J85PAVEnd=syntenicRegion[2]
        if AtPAVStart<=syntenicRegion[4]:
            AtPAVStart=syntenicRegion[4] 
        if AtPAVEnd>=syntenicRegion[5]:
            AtPAVEnd=syntenicRegion[5]
        #* PAV断点和共线性的区间
        A2MatchRegion.append(
            J85PAVStart
        )
        A2MatchRegion.append(
            J85PAVEnd
        )
        AtMatchRegion.append(
            AtPAVStart
        )
        AtMatchRegion.append(
            AtPAVEnd
        )
    #* 根据链的方向对断点的坐标进行排序
    if syntenicRegion[-1]=="++":
        A2MatchRegion=sorted(A2MatchRegion)
        AtMatchRegion=sorted(AtMatchRegion)
    else:
        A2MatchRegion=sorted(A2MatchRegion)
        #! 倒序排列了
        AtMatchRegion=sorted(AtMatchRegion,reverse=True)
    
    MatchRegion=[]
    for i in range(0,len(AtMatchRegion)-1):
        At_start=min(AtMatchRegion[i],AtMatchRegion[i+1])
        At_end=max(AtMatchRegion[i],AtMatchRegion[i+1])
        if A2MatchRegion[i]==A2MatchRegion[i+1] and AtMatchRegion[i]==AtMatchRegion[i+1]:
            #* 两个PAV序列连续的断点
            continue
        if i%2==0:
            MatchRegion.append(
                ( 
                    syntenicRegion[0],syntenicRegion[1],syntenicRegion[2],
                    syntenicRegion[3],syntenicRegion[4],syntenicRegion[5],
                    syntenicRegion[6], 
                    A2MatchRegion[i],A2MatchRegion[i+1], 
                    At_start,At_end,
                    "syntenic_noPAV"
                )
            )
        else:
            #* 前6列是共线性的坐标，后6列是A2和At PAV区域的断点坐标
            MatchRegion.append(
                ( 
                    syntenicRegion[0],syntenicRegion[1],syntenicRegion[2],
                    syntenicRegion[3],syntenicRegion[4],syntenicRegion[5],
                    syntenicRegion[6], 
                    A2MatchRegion[i],A2MatchRegion[i+1], 
                    At_start,At_end,
                    "syntenic_PAV"
                )
            )
    return pd.DataFrame(MatchRegion) 

In [None]:
MatchRegion=pd.DataFrame()
for syntenicRegion,syntenicPAV in syntenic_intersected_PAV.groupby([
        'J85_Synteny_Chr', 'J85_Synteny_Start', 'J85_Synteny_End', 'At_Synteny_Chr', 'At_Synteny_Start', 'At_Synteny_End','Stand']):
    syntenicMatch=getSyntenicMatchRegion(
        syntenicRegion,syntenicPAV
    )
    MatchRegion=pd.concat([ 
        MatchRegion,syntenicMatch
    ],axis=0
    )



In [None]:
#! 添加其他的区域
#? divergence序列以及syntenic_PAV区域
A2_divergence=pd.read_csv(
    "A2_vs_At_divergenceRegion_J85.txt",header=None,index_col=None,sep="\t"
)
At_divergence=pd.read_csv(
    "A2_vs_At_divergenceRegion_At.txt",header=None,index_col=None,sep="\t"
)
syntenic_noPAv=pd.read_csv(
    "./A2_vs_At_noPAV_synteny.txt",header=None,index_col=None,sep="\t"
)

windowAnnotate=[]
for val in A2_divergence.values:
    windowAnnotate.append(
        (val[0],val[1],val[2],".",-1,-1,".",val[1],val[2],-1,-1,'divergence')
    )
for val in At_divergence.values:
    windowAnnotate.append(
        (".",-1,-1,val[0],val[1],val[2],".",-1,-1,val[1],val[2],'divergence')
    )
for val in syntenic_noPAv.values:
    #* A2和At的共线性且无PAV区间
    windowAnnotate.append(
        (val[0],val[1],val[2],val[3],val[4],val[5],val[6],val[1],val[2],val[4],val[5],'syntenic_noPAV')
    )


In [None]:
windowAnnotate=pd.DataFrame(windowAnnotate)
All_windowAnnotate=pd.concat( 
    [MatchRegion,windowAnnotate],axis=0
)
All_windowAnnotate.to_csv(
    "./A2_At_AllAnnotate.txt",header=False,index=False,sep="\t"
)