In [117]:
import subprocess
import os
import glob
import toytree
import tqdm
import pandas as pd
from subprocess import PIPE, Popen
import shlex
from Bio import SeqIO
import random
import itertools
import numpy as np


def remove_seeds( alnfile):
    """
    this function removes seeds from an alignment
    """
    aln = SeqIO.parse(alnfile, 'fasta')
    sequences = []
    for s in aln:
        sequences.append(s)
    #write new aln
    
    with open(alnfile, 'w') as f:
        for s in sequences:
            f.write('>' + str(s.id).replace('seed','').replace('_' , '') + '\n')
            f.write(str(s.seq) + '\n')
    
    return alnfile

def Fident(str1,str2 , verbose = False):
    #minlen= min( (len(str1),len(str2))  )
    #str1 = str1[:minlen]
    #str2 = str2[:minlen]
    str1 = np.array(list(str1))
    str2 = np.array(list(str2))            
    return len(np.where( (str1 == str2 ) & (str1 != '-' ) & (str2 != '-')  )[0]) / len(str1)

def copyaln( aln, seq):
    seqiter = iter(seq)
    newaln = ''
    for i,char in enumerate(aln):
        if char == '-':
            newaln += '-'
        else:
            newaln+=next(seqiter)
    return newaln

def read_dbfiles3di(  AADB , threeDidb):
    #find positions 
    threeDiseq = [ l.strip().replace('\x00','') for l in open(threeDidb)]
    lookup = AADB+'.lookup'
    ids = [ l.split()[1].strip() for l in open(lookup)]
    AAs = [ l.strip().replace('\x00','') for l in open(AADB)]

    mapper3di = dict(zip(ids,threeDiseq))
    mapperAA = dict(zip(ids,AAs))
    
    return mapper3di, mapperAA

def calc_fident_crossaln(row , verbose = False):
    #amino acid representations of alns using AAand3di or just 3di
    qaln_2, taln_2 = row.qaln , row.taln
    #start and stop of aln
    
    qstart_2, qend_2, tstart_2 , tend_2 = row.qstart, row.qend , row.tstart , row.tend
    #indexing starts at 1...
    
    #3di of the query and target
    structQ, structT = row['3diq'], row['3dit']
    AAq, AAt = row['AAq'], row['AAt']

    #add gaps
    t3diAA_newgaps = copyaln(taln_2, structT[tstart_2-1:tend_2]) 
    q3diAA_newgaps = copyaln(qaln_2, structQ[qstart_2-1:qend_2])
    row = pd.Series( { '3di_qaln_mode2':q3diAA_newgaps , '3di_taln_mode2':t3diAA_newgaps })
    #return columns
    return row

def get_leafset( treenode ):
    """
    this function returns the leafset of a node
    """
    if treenode.is_leaf():
        return [treenode.name]
    else:
        return treenode.get_leaf_names()


def mafft_profile(aln1,aln2, outprofile , submat = None):
    """
    this function aligns two alignments using MAFFT
    """
    #make profile
    if submat:
        cmd = 'mafft --textmatrix {} --seed {} {} > {}'.format(submat, aln1,aln2, outprofile)
    else:
        cmd = 'mafft --seed {} {} > {}'.format(aln1,aln2, outprofile)

    print(cmd)
    subprocess.run(cmd , shell=True)
    return outprofile

def mafft_addfull(aln1,aln2, outprofile , submat = None):
    """
    this function aligns two alignments using MAFFT
    """
    #make profile
    profile = aln1 + '.profile'
    if submat:
        cmd = 'mafft --textmatrix {} --addfull {} --keeplength {} > {}'.format(submat, aln1,aln2, outprofile)
    else:
        cmd = 'mafft --addfull  {} --keeplength {} > {}'.format(aln1,aln2, outprofile)
    print(cmd)

    subprocess.run(cmd , shell = True)
    return outprofile


def sub2fasta( sub, outfile , fastacol1='qaln' , fastacol2='taln' ):
    with open(outfile, 'w') as f:
        f.write('>' + sub['query'] + '\n')
        f.write(sub[fastacol1] + '\n')
        f.write('>' + sub['target'] + '\n')
        f.write(sub[fastacol2] + '\n')    
    return outfile

def retalns(allvall, leafname1,leafname2):
    sub = allvall[allvall['query'].isin( leafname1)]
    sub = sub[sub['target'].isin(leafname2)]
    sub = sub[sub['query'] != sub['target']]
    #get max prot lenght aligned
    sub['alnlen'] = sub.apply(lambda x: max(x['qend'] - x['qstart'] , x['tend'] - x['tstart']) , axis = 1)
    sub = sub[sub['alnlen'] == sub['alnlen'].max()]
    if len(sub)==0:
        print(leafname1, leafname2)
        raise Exception('no sub')
    return sub.iloc[0]

def get_fasta_leafset(fasta):
    """
    this function returns the leafset of a fasta file
    """
    aln = SeqIO.parse(fasta, 'fasta')
    leafset = []
    for s in aln:
        leafset.append(s.id)
    return leafset

#traverse tree from root to leaves recursively
def traverse_tree_merge_mafft( treenode, topleafset, allvall , alnfolder , submat = None , verbose = False):
    """
    this function traverses a tree from root to leaves recursively
    it returns a dictionary with the iteratively built alignment
    """
    if verbose == True:
        print('traverse', treenode.name , treenode.is_leaf() , treenode.leafset)
    
    if treenode.is_leaf():
        print(treenode, treenode.name)
        topleafset.remove(treenode.name)
        #if the node is a leaf, then we need to add it to the alignment with one of the pivots in the current leafset
        #select the alignment of the leaf with itself
        sub = allvall[allvall['query'].isin( [treenode.name] )]
        sub = sub.iloc[0]
        
        assert len(sub) > 0


        with open(alnfolder + treenode.name + '_inter.fasta', 'w') as f:
            f.write('>' + sub['query'] + '\n')
            f.write(sub['AAq'] + '\n')
        with open(alnfolder + treenode.name + '_inter.3di.fasta', 'w') as f:
            f.write('>' + sub['query'] + '\n')
            f.write(sub['3diq'] + '\n')
        treenode.aln = alnfolder + treenode.name + '_inter.fasta'
        treenode.aln3di = alnfolder + treenode.name + '_inter.3di.fasta'
        return treenode.aln, treenode.aln3di
    
    else:
        childalns3di = {}
        childalnsAA = {}
       
        #treenode.leafset = get_leafset(treenode)
        #get the intersection of the child leafsets
        treenode.leafset = get_leafset(treenode)
        children = treenode.get_children()
        
        if len(children) == 2 and children[0].is_leaf() and children[1].is_leaf():
            #treat the case of a cherry
            print('cherry', children[0].name , children[1].name)
            treenode.aln = sub2fasta( retalns(allvall, [children[0].name] , [children[1].name]) , alnfolder + treenode.name + '_inter.fasta')
            treenode.aln3di = sub2fasta( retalns(allvall, [children[0].name] , [children[1].name]) , alnfolder + treenode.name + '_inter.3di.fasta' , fastacol1='3di_qaln_mode2' , fastacol2='3di_taln_mode2')
            return treenode.aln, treenode.aln3di
        
        else:
            #not a cherry. one or both sides is a subtree
            print('not cherry', treenode.name  )
            print( 'children', [c.name for c in children])
            for c in treenode.get_children():
                #make sub aln for each child
                if verbose == True:
                    print('traverse', c.name , c.is_leaf() , c.leafset)
                if not c.aln:
                    c.aln,c.aln3di = traverse_tree_merge_mafft(c , treenode.leafset , allvall, alnfolder , verbose = verbose)
                childalnsAA[c] = { 'fasta': c.aln  }
                childalns3di[c] = { 'fasta': c.aln3di  }
            
                

            if len(children) == 2:
                c1,c2 = children
                
                if c1.is_leaf():
                    treenode.aln = mafft_addfull(childalnsAA[c1]['fasta'], childalnsAA[c2]['fasta'], alnfolder + treenode.name + '_inter.fasta' )
                    treenode.aln3di = mafft_addfull(childalns3di[c1]['fasta'], childalns3di[c2]['fasta'], alnfolder + treenode.name + '_inter3di.fasta' , submat =submat )

                elif c2.is_leaf():
                    treenode.aln = mafft_addfull(childalnsAA[c2]['fasta'], childalnsAA[c1]['fasta'], alnfolder + treenode.name + '_inter.fasta' )
                    treenode.aln3di = mafft_addfull(childalns3di[c1]['fasta'], childalns3di[c2]['fasta'], alnfolder + treenode.name + '_inter3di.fasta' , submat = submat)

                else:
                    with open(childalnsAA[c2]['fasta'], 'r') as f:
                        c2seqs = f.read().count('>')
                    with open(childalnsAA[c1]['fasta'], 'r') as f:
                        c1seqs = f.read().count('>')
                    
                    if c1seqs > c2seqs:
                        treenode.aln = mafft_profile(childalnsAA[c1]['fasta'], childalnsAA[c2]['fasta'], alnfolder + treenode.name + '_inter.fasta' )
                        treenode.aln3di = mafft_profile(childalns3di[c1]['fasta'], childalns3di[c2]['fasta'], alnfolder + treenode.name + '_inter3di.fasta' , submat =submat )
                    else:
                        treenode.aln = mafft_profile(childalnsAA[c2]['fasta'], childalnsAA[c1]['fasta'], alnfolder + treenode.name + '_inter.fasta' )
                        treenode.aln3di = mafft_profile(childalns3di[c2]['fasta'], childalns3di[c1]['fasta'], alnfolder + treenode.name + '_inter3di.fasta' , submat =submat )
            
            elif len(children) > 2 and treenode.up == None:
                print('final aln')
                print('childalnsAA', childalnsAA)
                print('childalns3di', childalns3di)
                

                print([(c.aln,c.aln3di,type(c.aln), type(c.aln3di)) for c in children])
                
                children = [c for c in treenode.get_children() if c.aln and c.aln3di]
                
                for c in children:
                    #print alns
                    print('aln' + c.name)
                    with open(childalnsAA[c]['fasta'], 'r') as f:
                        print(f.read())
                    print('aln3di' + c.name )
                    with open(childalns3di[c]['fasta'], 'r') as f:
                        print(f.read())

                c1,c2 = children[0],children[1]
                if c1.is_leaf():
                    rootfasta = mafft_addfull(childalnsAA[c1]['fasta'], childalnsAA[c2]['fasta'], alnfolder + treenode.name + '_root.fasta' )
                    rootfasta3di = mafft_addfull(childalns3di[c1]['fasta'], childalns3di[c2]['fasta'], alnfolder + treenode.name + '_inter3di.fasta' , submat =submat )
                elif c2.is_leaf():
                    rootfasta = mafft_addfull(childalnsAA[c2]['fasta'], childalnsAA[c1]['fasta'], alnfolder + treenode.name + '_root.fasta' )
                    rootfasta3di = mafft_addfull(childalns3di[c1]['fasta'], childalns3di[c2]['fasta'], alnfolder + treenode.name + '_inter3di.fasta' , submat = submat)
                else:                
                    rootfasta = mafft_profile(childalnsAA[c1]['fasta'], childalnsAA[c2]['fasta'], alnfolder + treenode.name + '_root.fasta' )
                    rootfasta3di = mafft_profile(childalns3di[c1]['fasta'], childalns3di[c2]['fasta'], alnfolder + treenode.name + '_inter3di.fasta' , submat =  submat)
                
                print('aln1')
                with open(rootfasta , 'r') as f:
                    print(f.read())
                with open(rootfasta3di , 'r') as f:
                    print(f.read())
                
                for i,c in enumerate(children[1:]):
                    if c.is_leaf():
                        rootfasta = mafft_addfull( childalnsAA[c]['fasta'] , rootfasta, rootfasta +'.iter' )
                        rootfasta3di = mafft_addfull( childalns3di[c]['fasta'], rootfasta3di , rootfasta3di+'.iter'  , submat =submat )
                    else:
                        rootfasta = mafft_profile(rootfasta, childalnsAA[c]['fasta'], rootfasta+'.iter' )
                        rootfasta3di = mafft_profile(rootfasta3di, childalns3di[c]['fasta'], rootfasta3di+'.iter'  , submat =submat )
                    print('aln'+str(i))
                    with open(rootfasta , 'r') as f:
                        print(f.read())
                    with open(rootfasta3di , 'r') as f:
                        print(f.read())
                

                treenode.aln = rootfasta
                treenode.aln3di = rootfasta3di
                
                with open(treenode.aln , 'r') as f:
                    print(f.read())
                with open(treenode.aln3di , 'r') as f:
                    print(f.read())
                
            if verbose == True:
                #check if node is root  
                if treenode.up == None:
                    print('final aln')
                    print('childalnsAA', childalnsAA)
                    print('childalns3di', childalns3di)
            return treenode.aln, treenode.aln3di


def remove_redundant( alignment ):
    """
    this function removes redundant sequences from an alignment
    """
    aln = SeqIO.parse(alignment, 'fasta')
    seqs = []
    ids = []
    for s in aln:
        if s.id not in ids:
            seqs.append(s)
            ids.append(s.id)
    
    with open(alignment, 'w') as f:
        for s in seqs:
            f.write('>' + s.id + '\n')
            f.write(str(s.seq) + '\n')
    return alignment

#remove all alns except the final merged one
def cleanup( filedir ):
    """
    this function removes all alns except the final merged one
    """
    for f in glob.glob(filedir + '*inter.fasta'):
        os.remove(f)

In [106]:

#traverse tree from root to leaves recursively
def traverse_tree_merge( treenode, topleafset, allvall , alnfolder , verbose = False):
    """
    this function traverses a tree from root to leaves recursively
    it returns a dictionary with the iteratively built alignment
    """
    if verbose == True:
        print('traverse', treenode.name , treenode.is_leaf() , treenode.leafset)
    
    if treenode.is_leaf():
        topleafset.remove(treenode.name)
        #if the node is a leaf, then we need to add it to the alignment with one of the pivots in the current leafset
        sub = retalns(allvall, [treenode.name] , topleafset)  
        treenode.aln = sub2fasta(sub, alnfolder + treenode.name + '_inter.fasta')
        treenode.aln3di = sub2fasta(sub, alnfolder + treenode.name + '_inter.3di.fasta' , fastacol1='3di_qaln_mode2' , fastacol2='3di_taln_mode2')
        return treenode.aln, treenode.aln3di
    
    else:
        childalns3di = {}
        childalnsAA = {}
        bridges3di = {}
        bridgesAA = {}
        #treenode.leafset = get_leafset(treenode)
        #get the intersection of the child leafsets
        treenode.leafset = get_leafset(treenode)
        children = treenode.get_children()
        
        if len(children) == 2 and children[0].is_leaf() and children[1].is_leaf():
            #treat the case of a cherry
            print('cherry', children[0].name , children[1].name)
            treenode.aln = sub2fasta( retalns(allvall, [children[0].name] , [children[1].name]) , alnfolder + treenode.name + '_inter.fasta')
            treenode.aln3di = sub2fasta( retalns(allvall, [children[0].name] , [children[1].name]) , alnfolder + treenode.name + '_inter.3di.fasta' , fastacol1='3di_qaln_mode2' , fastacol2='3di_taln_mode2')
            return treenode.aln, treenode.aln3di
        
        else:
            #not a cherry. one or both sides is a subtree
            print('not cherry', treenode.name  )
            print( 'children', [c.name for c in children])
            for c in treenode.get_children():
                #make sub aln for each child
                if verbose == True:
                    print('traverse', c.name , c.is_leaf() , c.leafset)
                if not c.aln:
                    c.aln,c.aln3di = traverse_tree_merge(c , treenode.leafset , allvall, alnfolder , verbose = verbose)
                childalnsAA[c] = { 'fasta': c.aln , 'protset':set(get_fasta_leafset(c.aln) ) }
                childalns3di[c] = { 'fasta': c.aln3di , 'protset':set(get_fasta_leafset(c.aln3di) ) }
            
            """
            for c1,c2 in itertools.combinations(treenode.get_children(),2):
                bridge = retalns(allvall, childalnsAA[c1]['protset'] , childalnsAA[c2]['protset'] )
                bridgesAA[(c1,c2)] = { 'fasta': sub2fasta(bridge, alnfolder + treenode.name + '_bridge.fasta') , 'protset':set([bridge.query , bridge.target]) }
                bridges3di[(c1,c2)] = { 'fasta' : sub2fasta(bridge, alnfolder + treenode.name + '_bridge.3di.fasta' , fastacol1='3di_qaln_mode2' , fastacol2='3di_taln_mode2') , 'protset':set([bridge.query, bridge.target]) }
            
            #successively merge the alignments of the children
            for i, c in enumerate(itertools.combinations(treenode.get_children(),2)):
                c1,c2 = c
                if verbose == True:
                    print('merge', c1.name , c2.name)
                if i == 0:
                    #first merge
                    try:
                        print('first merge')
                        
                        alnAA = mergealns( childalnsAA[c1]['fasta'], bridgesAA[(c1,c2)]['fasta'] ,alnfolder + treenode.name + '_inter.fasta' , verbose=verbose)
                        aln3di = mergealns( childalns3di[c1]['fasta'], bridges3di[(c1,c2)]['fasta'] ,alnfolder + treenode.name + '_inter3di.fasta', verbose=verbose)

                        print('2 merge')

                        alnAA = mergealns( childalnsAA[c2]['fasta'], alnAA , alnfolder + treenode.name + '_inter.fasta' , verbose=verbose)
                        aln3di = mergealns( childalns3di[c2]['fasta'], aln3di ,alnfolder + treenode.name + '_inter3di.fasta', verbose=verbose)

                    except:
                        print( treenode , childalnsAA , childalns3di , bridgesAA , bridges3di)
                        raise Exception('merge error 1')
                else:
                    try:
                        print('3 merge')

                        alnAA = mergealns( childalnsAA[c1]['fasta'], bridgesAA[(c1,c2)]['fasta'] , alnfolder + treenode.name + '_inter.fasta' , verbose=verbose)
                        alnAA = mergealns( childalnsAA[c1]['fasta'], alnAA , alnfolder + treenode.name + '_inter.fasta' , verbose=verbose)

                        print('4 merge')

                        aln3di = mergealns( childalns3di[c2]['fasta'], bridges3di[(c1,c2)]['fasta'] , alnfolder + treenode.name + '_inter3di.fasta'   , verbose=verbose)
                        aln3di = mergealns( childalns3di[c2]['fasta'], aln3di , alnfolder + treenode.name + '_inter3di.fasta' , verbose=verbose)

                        
                    except:
                        print( treenode )
                        print( childalnsAA , childalns3di , bridgesAA , bridges3di)
                        raise Exception('merge error 2') 
            """

            treenode.aln = alnAA
            treenode.aln3di = aln3di
            if verbose == True:
                #check if node is root  
                if treenode.up == None:
                    print('final aln')
                    print('childalnsAA', childalnsAA)
                    print('childalns3di', childalns3di)
            return treenode.aln, treenode.aln3di


In [60]:

def aln_mapping( s1 , s2, maxaln = 0 , coidx1= 0 , coidx2 = 0 , start1 = 0 , start2=0 ,  verbose = False):
    #build a dictionary of the positions of the characters in the string
    #convolve the strings
    maxaln, maxcount = convolve_strings(s1,s2)

    if start1 != 0 or start2 != 0:
        print('start', start1, start2)
    if verbose == True:
        print('maxaln', maxaln, maxcount)
    
    #find starting points
    if len(s1) < len(s2):
        if maxaln < 0:
            coidx1 = np.abs(maxaln)
            coidx2 = 0
        else:
            coidx1 = 0
            coidx2 = maxaln
    else:
        if maxaln < 0:
            coidx1 = 0
            coidx2 = np.abs(maxaln)
        else:
            coidx1 = maxaln
            coidx2 = 0
    
    substr1 = s1[coidx1:]
    substr2 = s2[coidx2:]

    print('substr1', substr1)
    print('substr2', substr2)
    print('coidx1', coidx1)
    print('coidx2', coidx2)
    
    #find equivalent positions in the strings
    maps1 = {}
    maps2 = {}

    oppositemap1 = {}
    oppositemap2 = {}

    for i, char in enumerate(substr1):
        if substr2[i] == char:
            maps1[i+coidx1] = i + start1
            maps2[i+coidx2] = i + start2
            
            oppositemap1[i+coidx1] = i + start2
            oppositemap2[i+coidx2] = i + start1
        else:
            #if there is mismatch convolve the remaining strings
            print('mismatch')
            sub1,sub2 , om1 , om2 = aln_mapping( substr1[i:] , substr2[i:], maxaln = 0 , start1= start1+coidx1+i , start2 = start2+coidx2+i , verbose = False)
            maps1.update( sub1 )
            maps2.update( sub2 )
            oppositemap1.update( om1 )
            oppositemap2.update( om2 )
            break
    
    return maps1, maps2 , oppositemap1, oppositemap2

def aln_mapping_full( s1 , s2, maxaln = 0 , coidx1= 0 , coidx2 = 0 , verbose = False):
    maps1, maps2 , om1 , om2 = aln_mapping( s1 , s2, maxaln = 0 , coidx1= 0 , coidx2 = 0 , verbose = False)
    #add the reverse mapping
    revmap1 = { v:k for k,v in maps1.items()}
    revmap2 = { v:k for k,v in maps2.items()}

    revmapom1 = { v:k for k,v in om1.items()}   
    revmapom2 = { v:k for k,v in om2.items()}
    
    return maps1, maps2, om1, om2 , revmap1, revmap2 , revmapom1, revmapom2

In [61]:


def convolve_strings(str1, str2):
    # Determine the lengths of the strings
    len1, len2 = len(str1), len(str2)

    if len(str1) < len(str2):
        str1, str2 = str2, str1
        len1, len2 = len2, len1
    max_alignment = 0
    max_count = 0
    # Slide str2 over str1, starting with one character overlap
    # and continue until str2 is again overlapping by just one character
    for i in range(-len2 + 1, len1):
        count = 0
        for j in range(len2):
            if 0 <= i + j < len1 and str1[i + j] == str2[j]:
                count += 1
        if count > max_count:
            max_count = count
            max_alignment = i
    return max_alignment, max_count


def alnchop(s1,s2,rawaln1,rawaln2,aln1,aln2,maxaln = 0):

    #align the two sequences
    coidx1 = 0
    coidx2 = 0

    if len(s1) < len(s2):
        if maxaln < 0:
            coidx1 = np.abs(maxaln)
            coidx2 = 0
        else:
            coidx1 = 0
            coidx2 = maxaln
    else:
        if maxaln < 0:
            coidx1 = 0
            coidx2 = np.abs(maxaln)
        else:
            coidx1 = maxaln
            coidx2 = 0
    discardcount = 0 
    rawaln1 = iter(rawaln1)
    while coidx1 > 0:
        rchar1 = next(rawaln1)
        discardcount += 1
        if rchar1 != '-':
            coidx1 -= 1
    aln1 = aln1[discardcount:]


    discardcount = 0 
    rawaln2 = iter(rawaln2)
    while coidx1 > 0:
        rchar2 = next(rawaln2)
        discardcount += 1
        if rchar2 != '-':
            coidx2 -= 1
    aln2 = aln2[discardcount:]


    rawaln1 = ''.join([ s for s in iter(rawaln1)])
    rawaln2 = ''.join([ s for s in iter(rawaln2)])

    return aln1, aln2, rawaln1, rawaln2


def mergealns( aln1f, aln2f, outfile , verbose = False):
    if set(get_fasta_leafset(aln1f)) == set(get_fasta_leafset(aln2f)):
        print('identical')
        return aln1f

    #find sequences in common between the two alignments
    aln1 = SeqIO.parse(aln1f, 'fasta')
    aln2 = SeqIO.parse(aln2f, 'fasta')
    ids1 = {s.id:str(s.seq) for s in aln1}
    ids2 = {s.id:str(s.seq) for s in aln2}
    aln1 = SeqIO.parse(aln1f, 'fasta')
    aln2 = SeqIO.parse(aln2f, 'fasta')
    idlist = [ s.id for s in aln1] + [ s.id for s in aln2]
    commonids = set(ids1.keys()).intersection(set(ids2.keys()))
    try:
        assert len(commonids) > 0
    except:
        print('no common ids')
        print('ids1', ids1)
        print('ids2', ids2)
        raise Exception('no common ids')
    #transform both alignments into numpy matrices
    aln1 = SeqIO.parse(aln1f, 'fasta')
    aln2 = SeqIO.parse(aln2f, 'fasta')
    aln1 = np.array([ list(str(s.seq)) for s in aln1])
    aln2 = np.array([ list(str(s.seq)) for s in aln2])
    nrows1 = aln1.shape[0]
    nrows2 = aln2.shape[0]

    #generate a list of column arrays
    aln1 = [ aln1[:,i] for i in range(aln1.shape[1])]
    aln2 = [ aln2[:,i] for i in range(aln2.shape[1])]
    #find the best common sequence
    maxconv = 0
    maxaln = 0

    print(ids1)
    print(ids2)
    
    for commonid in commonids:
        s1t = ids1[commonid]
        s2t = ids2[commonid]
        s1t = s1t.replace('-','')
        s2t = s2t.replace('-','')
        #if the common subsequence is not found start by removing the first character of the common sequence
        #convolution of the two sequences
        aln, count = convolve_strings(s1t,s2t)
        if count > maxconv:
            maxconv = count
            maxaln = aln
            ID = commonid
            s1 = s1t
            s2 = s2t

    rawaln1 = ids1[ID]
    rawaln2 = ids2[ID]
    print('pivot' , ID)
    print('s1',s1)
    print('s2',s2)

    print('rawaln1', rawaln1)
    print('rawaln2', rawaln2)
    
    print('maxaln', maxaln)
    #use the sequence convolution to align the two alignment arrays
    if len(s1) < len(s2):
        if maxaln < 0:
            coidx1 = np.abs(maxaln)
            coidx2 = 0
        else:
            coidx1 = 0
            coidx2 = maxaln
    else:
        if maxaln < 0:
            coidx1 = 0
            coidx2 = np.abs(maxaln)
        else:
            coidx1 = maxaln
            coidx2 = 0
    print('coidx1', coidx1)
    print('coidx2', coidx2)


    #remove the leading gaps
    for i in range(len(rawaln1)):
        if rawaln1 != '-':
            break
    aln1 = aln1[i:]
    rawaln1 = rawaln1[i:]
    for i in range(len(rawaln1)):
        if rawaln1 != '-':
            break
    aln2 = aln2[i:]
    rawaln2 = rawaln2[i:]

    #remove the trailing gaps
    for i in range(len(rawaln1)):
        if rawaln1[-i] != '-':
            break
    aln1 = aln1[:len(rawaln1)-i]
    rawaln1 = rawaln1[:len(rawaln1)-i]
    for i in range(len(rawaln2)):
        if rawaln2[-i] != '-':
            break
    aln2 = aln2[:len(rawaln2)-i]
    rawaln2 = rawaln2[:len(rawaln2)-i]
    

    #construct alignment with common sequence

    rawaln1 = iter(rawaln1)
    rawaln2 = iter(rawaln2)    
    char1 = next(rawaln1)
    char2 = next(rawaln2)
    i = 1
    j = 1 
    pchar1 = char1
    pchar2 = char2
    newaln1 = []
    newaln2 = []
    convolved = False
    while True:
        try:
            if pchar1 == '-' and char1 != '-':
                print('end insertion1')

            if pchar2 == '-' and char2 != '-':
                print('end insertion2')

            if char1 == '-' and char2 != '-':
                print('insertion1')
                newaln2.append(['-']*nrows2)
                newaln1.append(aln1[i])
                pchar1 = char1
                char1 = next(rawaln1)
                i +=1

            elif char2 == '-' and char1 != '-':
                print('insertion2')
                newaln2.append(aln2[j])
                newaln1.append(['-']*nrows1)
                pchar2 = char2
                char2 = next(rawaln2)
                j +=1

            elif char1 == char2 and char1 != '-' and char2 != '-':
                char1 = next(rawaln1)
                char2 = next(rawaln2)
                newaln2.append(aln2[j])
                newaln1.append(aln1[i])
                pchar1 = char1
                pchar2 = char2
                j+= 1
                i+= 1

            elif char1 != '-' and char2 != '-' and char1 != char2:
                convolved = True
                print('mismatch')
                #mismatch reconvolve remaining strings
                rawaln1 = ''.join([ s for s in iter(rawaln1)])
                rawaln2 = ''.join([ s for s in iter(rawaln2)])
                s1 = rawaln1.replace('-','')
                s2 = rawaln2.replace('-','')

                maxaln, count = convolve_strings(s1,s2)
                if len(s1) < len(s2):
                    if maxaln < 0:
                        coidx1 = np.abs(maxaln)
                        coidx2 = 0
                    else:
                        coidx1 = 0
                        coidx2 = maxaln
                else:
                    if maxaln < 0:
                        coidx1 = 0
                        coidx2 = np.abs(maxaln)
                    else:
                        coidx1 = maxaln
                        coidx2 = 0
                
                rawaln1 = iter(rawaln1)
                discardcount1 = 0
                count1 = 0
                while count1 < coidx1:
                    discardcount1 += 1
                    char1 = next(rawaln1)
                    if char1 != '-':
                        count1 += 1
                aln1 = aln1[discardcount1:]
                rawaln2 = iter(rawaln2) 
                discardcount2 = 0
                count2 = 0
                while count2 < coidx2:
                    discardcount2 += 1
                    char2 = next(rawaln2)
                    if char2 != '-':
                        count2 += 1
                aln2 = aln2[discardcount2:]

                char1 = next(rawaln1)
                char2 = next(rawaln2)
                

                i = 0 
                j = 0
                print('char1', char1)
                print('char2', char2)

                print( 'newaln1' , newaln1)

                print( 'newaln2' , newaln2)
                
            else:
                print('end')
                break
        
        except StopIteration:
            break
    try:
        newaln1 = np.vstack(newaln1).T
        newaln2 = np.vstack(newaln2).T
        newaln = np.concatenate((newaln1, newaln2), axis = 0)
    except:
        print( 'aln err')
        print( 'char1', char1 )
        print( 'char2' , char2 )
        print( 'aln1', aln1 ) 
        print( 'aln2' , aln2 )
        print( 'maxaln'  , maxaln)
        raise Exception('aln err: newaln not created')
    #write out the new alignment
    with open(outfile, 'w') as f:
        for i in range(newaln.shape[0]):
            #print('>' + idlist[i] + '\n' + ''.join(list(newaln[i,:])) + '\n')
            f.write('>' + idlist[i] + '\n')
            f.write(''.join(list(newaln[i,:])) + '\n')
    return outfile  

In [107]:

import toytree
import os
import pandas as pd
import glob

infolder = '../../testcat/4.10.530/'
alndf = pd.read_table(infolder + 'allvall_1.csv', header = None)

mapper3di, mapperAA = read_dbfiles3di( infolder+"outdb" , infolder+"outdb_ss")
#add the 3di alignment to the dataframe
columns = 'query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits,lddt,qaln,taln,cigar,lntmscore'.split(',')
alndf.columns = columns

alndf['3diq']= alndf['query'].map(mapper3di)
alndf['3dit']= alndf['target'].map(mapper3di)
alndf['AAq']= alndf['query'].map(mapperAA)
alndf['AAt']= alndf['target'].map(mapperAA)

#output a fasta with the 3di sequences
res = alndf.apply(calc_fident_crossaln , axis = 1)
alndf = pd.concat([alndf,res] , axis = 1)

with open(infolder + '3diseqs.fasta' , 'w') as out:
    for seq in alndf['query'].unique():
        out.write('>'+seq.replace('.pdb', '' )+'\n')
        out.write(mapper3di[seq]+'\n')
alndf['query'] = alndf['query'].map(lambda x :x.replace('.pdb', ''))
alndf['target'] = alndf['target'].map(lambda x :x.replace('.pdb', ''))



In [108]:
print(alndf.head())

    query  target  fident  alnlen  mismatch  gapopen  qstart  qend  tstart  \
0  O00602  O00602   1.000     217         0        0       1   217       1   
1  O00602  Q15485   0.850     214        32        0       4   217       1   
2  O00602  Q8N539   0.495     220       104        3       4   217       1   
3  O00602  O75636   0.523     214        96        3       5   215       1   
4  O00602  Q9U8W8   0.472     218       108        3       6   217       2   

   tend  ...                                               qaln  \
0   217  ...  SCATGPRNCKDLLDRGYFLSGWHTIYLPDCRPLTVLCDMDTDGGGW...   
1   214  ...  TGPRNCKDLLDRGYFLSGWHTIYLPDCRPLTVLCDMDTDGGGWTVF...   
2   219  ...  TGPRNCKDLLDRGYFLSGWHTIYLPDCR-PLTVLCDMDTDGGGWTV...   
3   211  ...  GPRNCKDLLDRGYFLSGWHTIYLPDCRPLTVLCDMDTDGGGWTVFQ...   
4   218  ...  PRNCKDLLDRGYFLSGWHTIYLPD---CRPLTVLCDMDTDGGGWTV...   

                                                taln               cigar  \
0  SCATGPRNCKDLLDRGYFLSGWHTIYLPDCRPLTVLCDMDTDGGGW...

In [124]:
#prepare tree attributes
import shutil
tre = toytree.tree(infolder + 'struct_tree.PP.nwk.rooted.final'  )
for i,n in enumerate(tre.treenode.traverse()):
    n.aln = None
    n.aln3di = None
    n.leafset = None
    if len(n.name) == 0:
        n.name = 'internal_'+str(i)

alnfolder = infolder+'alnscratch/'
if not os.path.exists(alnfolder):
    os.mkdir(infolder+'alnscratch/')

#clear all files in aln scratch
cleanup(alnfolder)
print(tre.treenode)
submat = '../mafftmat/3diHEXmat.txt'
finalaln, finalaln3di = traverse_tree_merge_mafft( tre.treenode.get_tree_root(), get_leafset(tre.treenode.get_tree_root()) , alndf , infolder+'alnscratch/' , submat = submat , verbose = True ) 
print('finalaln',finalaln)
#print the final alignments
print('nsequences' , len(tre.get_tip_labels()))
finalaln = remove_seeds(finalaln)
finalaln3di = remove_seeds(finalaln3di)

finalaln = remove_redundant(finalaln)
finalaln3di = remove_redundant(finalaln3di)

#copy to root folder
shutil.copy(finalaln, infolder + 'alnAAfoldtree.fasta')
shutil.copy(finalaln3di, infolder + 'aln3difoldtree.fasta')



      /-Q8N539
   /-|
  |   \-Q9U8W8
  |
  |   /-P24821
  |--|
  |  |   /-O75636
  |   \-|
  |     |   /-Q15485
--|      \-|
  |         \-O00602
  |
  |      /-Q9BY76
  |   /-|
  |  |  |   /-Q15389
  |  |   \-|
  |  |      \-O15123
   \-|
     |      /-P04115
     |   /-|
     |  |  |   /-P02679
     |  |   \-|
      \-|      \-O93568
        |
        |   /-P02671
         \-|
           |   /-P02678
            \-|
              |   /-P02675
               \-|
                  \-Q02020
traverse 29 False None
not cherry 29
children ['28', '27', '26']
traverse 28 False None
traverse 28 False None
cherry Q8N539 Q9U8W8
traverse 27 False None
traverse 27 False None
not cherry 27
children ['P24821', '25']
traverse P24821 True None
traverse P24821 True None

--P24821 P24821
traverse 25 False None
traverse 25 False None
not cherry 25
children ['O75636', '22']
traverse O75636 True None
traverse O75636 True None

--O75636 O75636
traverse 22 False None
traverse 22 False None
cherry Q15485 O0

nadd = 1
rescale = 1
dndpre (aa) Version 7.520
alg=X, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

rescale = 1
All-to-all alignment.
    1 / 2

##### writing hat3
pairlocalalign (aa) Version 7.520
alg=Y, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

nadd = 1
nthread = 0
blosum 62 / kimura 200
sueff_global = 0.100000
norg = 2
njobc = 3
Loading 'hat3' ... 
done.
rescale = 1
Loading 'hat2n' (aligned sequences - new sequences) ... done.
Loading 'hat2i' (aligned sequences) ... done.
cTEP 0 / 1                    

Combining ..
   done.                      

   done.                      

addsingle (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
0 thread(s)


Strategy:
 Multi-INS-full (Not tested.)
 ?

If unsure which option to use, try 'mafft --auto input > output'.
For more information, see 'mafft --help', 'mafft --man' and the mafft page.

The default gap scoring scheme has been changed in version 7.110 (2013 Oct

mafft --addfull  ../../testcat/4.10.530/alnscratch/O75636_inter.3di.fasta --keeplength ../../testcat/4.10.530/alnscratch/22_inter.3di.fasta > ../../testcat/4.10.530/alnscratch/25_inter3di.fasta


nadd = 1
rescale = 1
dndpre (aa) Version 7.520
alg=X, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

rescale = 1
All-to-all alignment.
    1 / 2

##### writing hat3
pairlocalalign (aa) Version 7.520
alg=Y, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

nadd = 1
nthread = 0
blosum 62 / kimura 200
sueff_global = 0.100000
norg = 2
njobc = 3
Loading 'hat3' ... 
done.
rescale = 1
Loading 'hat2n' (aligned sequences - new sequences) ... done.
Loading 'hat2i' (aligned sequences) ... done.
cTEP 0 / 1                    

Combining ..
   done.                      

   done.                      

addsingle (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
0 thread(s)


Strategy:
 Multi-INS-full (Not tested.)
 ?

If unsure which option to use, try 'mafft --auto input > output'.
For more information, see 'mafft --help', 'mafft --man' and the mafft page.

The default gap scoring scheme has been changed in version 7.110 (2013 Oct

mafft --addfull  ../../testcat/4.10.530/alnscratch/P24821_inter.fasta --keeplength ../../testcat/4.10.530/alnscratch/25_inter.fasta > ../../testcat/4.10.530/alnscratch/27_inter.fasta


nadd = 1
rescale = 1
dndpre (aa) Version 7.520
alg=X, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

rescale = 1
All-to-all alignment.
    2 / 3

##### writing hat3
pairlocalalign (aa) Version 7.520
alg=Y, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

nadd = 1
nthread = 0
blosum 62 / kimura 200
sueff_global = 0.100000
norg = 3
njobc = 4
Loading 'hat3' ... 
done.
rescale = 1
Loading 'hat2n' (aligned sequences - new sequences) ... done.
Loading 'hat2i' (aligned sequences) ... done.
cTEP 0 / 1                    

Combining ..
   done.                      

   done.                      

addsingle (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
0 thread(s)


To keep the alignment length, 9 letters were DELETED.
To know the positions of deleted letters, rerun the same command with the --mapout option.

Strategy:
 Multi-INS-full (Not tested.)
 ?

If unsure which option to use, try 'mafft --auto input > output'.
For m

mafft --addfull  ../../testcat/4.10.530/alnscratch/P24821_inter.3di.fasta --keeplength ../../testcat/4.10.530/alnscratch/25_inter3di.fasta > ../../testcat/4.10.530/alnscratch/27_inter3di.fasta


nadd = 1
rescale = 1
dndpre (aa) Version 7.520
alg=X, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

rescale = 1
All-to-all alignment.
    2 / 3

##### writing hat3
pairlocalalign (aa) Version 7.520
alg=Y, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

nadd = 1
nthread = 0
blosum 62 / kimura 200
sueff_global = 0.100000
norg = 3
njobc = 4
Loading 'hat3' ... 
done.
rescale = 1
Loading 'hat2n' (aligned sequences - new sequences) ... done.
Loading 'hat2i' (aligned sequences) ... done.
cTEP 0 / 1                    

Combining ..
   done.                      

   done.                      

addsingle (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
0 thread(s)


To keep the alignment length, 10 letters were DELETED.
To know the positions of deleted letters, rerun the same command with the --mapout option.

Strategy:
 Multi-INS-full (Not tested.)
 ?

If unsure which option to use, try 'mafft --auto input > output'.
For 

traverse 26 False None
traverse 26 False None
not cherry 26
children ['24', '23']
traverse 24 False None
traverse 24 False None
not cherry 24
children ['Q9BY76', '21']
traverse Q9BY76 True None
traverse Q9BY76 True None

--Q9BY76 Q9BY76
traverse 21 False None
traverse 21 False None
cherry Q15389 O15123
mafft --addfull  ../../testcat/4.10.530/alnscratch/Q9BY76_inter.fasta --keeplength ../../testcat/4.10.530/alnscratch/21_inter.fasta > ../../testcat/4.10.530/alnscratch/24_inter.fasta


nadd = 1
rescale = 1
dndpre (aa) Version 7.520
alg=X, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

rescale = 1
All-to-all alignment.
    1 / 2

##### writing hat3
pairlocalalign (aa) Version 7.520
alg=Y, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

nadd = 1
nthread = 0
blosum 62 / kimura 200
sueff_global = 0.100000
norg = 2
njobc = 3
Loading 'hat3' ... 
done.
rescale = 1
Loading 'hat2n' (aligned sequences - new sequences) ... done.
Loading 'hat2i' (aligned sequences) ... done.
cTEP 0 / 1                    

Combining ..
   done.                      

   done.                      

addsingle (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
0 thread(s)


To keep the alignment length, 9 letters were DELETED.
To know the positions of deleted letters, rerun the same command with the --mapout option.

Strategy:
 Multi-INS-full (Not tested.)
 ?

If unsure which option to use, try 'mafft --auto input > output'.
For m

mafft --addfull  ../../testcat/4.10.530/alnscratch/Q9BY76_inter.3di.fasta --keeplength ../../testcat/4.10.530/alnscratch/21_inter.3di.fasta > ../../testcat/4.10.530/alnscratch/24_inter3di.fasta


nadd = 1
rescale = 1
dndpre (aa) Version 7.520
alg=X, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

rescale = 1
All-to-all alignment.
    1 / 2

##### writing hat3
pairlocalalign (aa) Version 7.520
alg=Y, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

nadd = 1
nthread = 0
blosum 62 / kimura 200
sueff_global = 0.100000
norg = 2
njobc = 3
Loading 'hat3' ... 
done.
rescale = 1
Loading 'hat2n' (aligned sequences - new sequences) ... done.
Loading 'hat2i' (aligned sequences) ... done.
cTEP 0 / 1                    

Combining ..
   done.                      

   done.                      

addsingle (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
0 thread(s)


To keep the alignment length, 5 letters were DELETED.
To know the positions of deleted letters, rerun the same command with the --mapout option.

Strategy:
 Multi-INS-full (Not tested.)
 ?

If unsure which option to use, try 'mafft --auto input > output'.
For m

traverse 23 False None
traverse 23 False None
not cherry 23
children ['20', '19']
traverse 20 False None
traverse 20 False None
not cherry 20
children ['P04115', '18']
traverse P04115 True None
traverse P04115 True None

--P04115 P04115
traverse 18 False None
traverse 18 False None
cherry P02679 O93568
mafft --addfull  ../../testcat/4.10.530/alnscratch/P04115_inter.fasta --keeplength ../../testcat/4.10.530/alnscratch/18_inter.fasta > ../../testcat/4.10.530/alnscratch/20_inter.fasta


nadd = 1
rescale = 1
dndpre (aa) Version 7.520
alg=X, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

rescale = 1
All-to-all alignment.
    1 / 2

##### writing hat3
pairlocalalign (aa) Version 7.520
alg=Y, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

nadd = 1
nthread = 0
blosum 62 / kimura 200
sueff_global = 0.100000
norg = 2
njobc = 3
Loading 'hat3' ... 
done.
rescale = 1
Loading 'hat2n' (aligned sequences - new sequences) ... done.
Loading 'hat2i' (aligned sequences) ... done.
cTEP 0 / 1                    

Combining ..
   done.                      

   done.                      

addsingle (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
0 thread(s)


To keep the alignment length, 34 letters were DELETED.
To know the positions of deleted letters, rerun the same command with the --mapout option.

Strategy:
 Multi-INS-full (Not tested.)
 ?

If unsure which option to use, try 'mafft --auto input > output'.
For 

mafft --addfull  ../../testcat/4.10.530/alnscratch/P04115_inter.3di.fasta --keeplength ../../testcat/4.10.530/alnscratch/18_inter.3di.fasta > ../../testcat/4.10.530/alnscratch/20_inter3di.fasta


nadd = 1
rescale = 1
dndpre (aa) Version 7.520
alg=X, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

rescale = 1
All-to-all alignment.
    1 / 2

##### writing hat3
pairlocalalign (aa) Version 7.520
alg=Y, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

nadd = 1
nthread = 0
blosum 62 / kimura 200
sueff_global = 0.100000
norg = 2
njobc = 3
Loading 'hat3' ... 
done.
rescale = 1
Loading 'hat2n' (aligned sequences - new sequences) ... done.
Loading 'hat2i' (aligned sequences) ... done.
cTEP 0 / 1                    

Combining ..
   done.                      

   done.                      

addsingle (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
0 thread(s)


To keep the alignment length, 34 letters were DELETED.
To know the positions of deleted letters, rerun the same command with the --mapout option.

Strategy:
 Multi-INS-full (Not tested.)
 ?

If unsure which option to use, try 'mafft --auto input > output'.
For 

traverse 19 False None
traverse 19 False None
not cherry 19
children ['P02671', '17']
traverse P02671 True None
traverse P02671 True None

--P02671 P02671
traverse 17 False None
traverse 17 False None
not cherry 17
children ['P02678', '16']
traverse P02678 True None
traverse P02678 True None

--P02678 P02678
traverse 16 False None
traverse 16 False None
cherry P02675 Q02020
mafft --addfull  ../../testcat/4.10.530/alnscratch/P02678_inter.fasta --keeplength ../../testcat/4.10.530/alnscratch/16_inter.fasta > ../../testcat/4.10.530/alnscratch/17_inter.fasta


nadd = 1
rescale = 1
dndpre (aa) Version 7.520
alg=X, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

rescale = 1
All-to-all alignment.
    1 / 2

##### writing hat3
pairlocalalign (aa) Version 7.520
alg=Y, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

nadd = 1
nthread = 0
blosum 62 / kimura 200
sueff_global = 0.100000
norg = 2
njobc = 3
Loading 'hat3' ... 
done.
rescale = 1
Loading 'hat2n' (aligned sequences - new sequences) ... done.
Loading 'hat2i' (aligned sequences) ... done.
cTEP 0 / 1                    

Combining ..
   done.                      

   done.                      

addsingle (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
0 thread(s)


To keep the alignment length, 16 letters were DELETED.
To know the positions of deleted letters, rerun the same command with the --mapout option.

Strategy:
 Multi-INS-full (Not tested.)
 ?

If unsure which option to use, try 'mafft --auto input > output'.
For 

mafft --addfull  ../../testcat/4.10.530/alnscratch/P02678_inter.3di.fasta --keeplength ../../testcat/4.10.530/alnscratch/16_inter.3di.fasta > ../../testcat/4.10.530/alnscratch/17_inter3di.fasta


nadd = 1
rescale = 1
dndpre (aa) Version 7.520
alg=X, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

rescale = 1
All-to-all alignment.
    1 / 2

##### writing hat3
pairlocalalign (aa) Version 7.520
alg=Y, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

nadd = 1
nthread = 0
blosum 62 / kimura 200
sueff_global = 0.100000
norg = 2
njobc = 3
Loading 'hat3' ... 
done.
rescale = 1
Loading 'hat2n' (aligned sequences - new sequences) ... done.
Loading 'hat2i' (aligned sequences) ... done.
cTEP 0 / 1                    

Combining ..
   done.                      

   done.                      

addsingle (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
0 thread(s)


To keep the alignment length, 16 letters were DELETED.
To know the positions of deleted letters, rerun the same command with the --mapout option.

Strategy:
 Multi-INS-full (Not tested.)
 ?

If unsure which option to use, try 'mafft --auto input > output'.
For 

mafft --addfull  ../../testcat/4.10.530/alnscratch/P02671_inter.fasta --keeplength ../../testcat/4.10.530/alnscratch/17_inter.fasta > ../../testcat/4.10.530/alnscratch/19_inter.fasta


nadd = 1
rescale = 1
dndpre (aa) Version 7.520
alg=X, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

rescale = 1
All-to-all alignment.
    2 / 3

##### writing hat3
pairlocalalign (aa) Version 7.520
alg=Y, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

nadd = 1
nthread = 0
blosum 62 / kimura 200
sueff_global = 0.100000
norg = 3
njobc = 4
Loading 'hat3' ... 
done.
rescale = 1
Loading 'hat2n' (aligned sequences - new sequences) ... done.
Loading 'hat2i' (aligned sequences) ... done.
cTEP 0 / 1                    

Combining ..
   done.                      

   done.                      

addsingle (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
0 thread(s)


To keep the alignment length, 3 letters were DELETED.
To know the positions of deleted letters, rerun the same command with the --mapout option.

Strategy:
 Multi-INS-full (Not tested.)
 ?

If unsure which option to use, try 'mafft --auto input > output'.
For m

mafft --addfull  ../../testcat/4.10.530/alnscratch/P02671_inter.3di.fasta --keeplength ../../testcat/4.10.530/alnscratch/17_inter3di.fasta > ../../testcat/4.10.530/alnscratch/19_inter3di.fasta


nadd = 1
rescale = 1
dndpre (aa) Version 7.520
alg=X, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

rescale = 1
All-to-all alignment.
    2 / 3

##### writing hat3
pairlocalalign (aa) Version 7.520
alg=Y, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

nadd = 1
nthread = 0
blosum 62 / kimura 200
sueff_global = 0.100000
norg = 3
njobc = 4
Loading 'hat3' ... 
done.
rescale = 1
Loading 'hat2n' (aligned sequences - new sequences) ... done.
Loading 'hat2i' (aligned sequences) ... done.
cTEP 0 / 1                    

Combining ..
   done.                      

   done.                      

addsingle (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
0 thread(s)


To keep the alignment length, 3 letters were DELETED.
To know the positions of deleted letters, rerun the same command with the --mapout option.

Strategy:
 Multi-INS-full (Not tested.)
 ?

If unsure which option to use, try 'mafft --auto input > output'.
For m

mafft --seed ../../testcat/4.10.530/alnscratch/20_inter.fasta ../../testcat/4.10.530/alnscratch/19_inter.fasta > ../../testcat/4.10.530/alnscratch/23_inter.fasta


############################################################################
#   Progressive alignment method is incompatible with the --seed option.
#   Automatically switched to the iterative refinement method.
#   
# Also consider using the '--add' option, which is compatible with
#   the progressive method and FASTER than the '--seed' option.
#   Usage is:
#   % mafft --add newSequences existingAlignment > output
############################################################################
nhomologs = 4
seedoffset = 0
seed = seed1
rescale = 1
Gap Penalty = -1.53, +0.00, -0.12
tsuyosa = 1600.000000

adding 1-2
multi2hat3s (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, +0.12, -0.00, noshift, amax=0.0
1 thread(s)

nthread = 0
nthreadpair = 0
nthreadtb = 0
ppenalty_ex = 0
stacksize: 8192 kb
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00



Making a distance matrix ..
    1 / 7
done.

Constructing a UPGMA tree (efffree=1) ... 
    0 / 7
done.

Progressive alignment 1/1... 
STEP     6 / 6

mafft --seed ../../testcat/4.10.530/alnscratch/20_inter3di.fasta ../../testcat/4.10.530/alnscratch/19_inter3di.fasta > ../../testcat/4.10.530/alnscratch/23_inter3di.fasta


############################################################################
#   Progressive alignment method is incompatible with the --seed option.
#   Automatically switched to the iterative refinement method.
#   
# Also consider using the '--add' option, which is compatible with
#   the progressive method and FASTER than the '--seed' option.
#   Usage is:
#   % mafft --add newSequences existingAlignment > output
############################################################################
nhomologs = 4
seedoffset = 0
seed = seed1
rescale = 1
Gap Penalty = -1.53, +0.00, -0.12
tsuyosa = 1600.000000

adding 1-2
multi2hat3s (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, +0.12, -0.00, noshift, amax=0.0
1 thread(s)

nthread = 0
nthreadpair = 0
nthreadtb = 0
ppenalty_ex = 0
stacksize: 8192 kb
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00



Making a distance matrix ..
    1 / 7
done.

Constructing a UPGMA tree (efffree=1) ... 
    0 / 7
done.

Progressive alignment 1/1... 
STEP     6 / 6

mafft --seed ../../testcat/4.10.530/alnscratch/24_inter.fasta ../../testcat/4.10.530/alnscratch/23_inter.fasta > ../../testcat/4.10.530/alnscratch/26_inter.fasta


############################################################################
#   Progressive alignment method is incompatible with the --seed option.
#   Automatically switched to the iterative refinement method.
#   
# Also consider using the '--add' option, which is compatible with
#   the progressive method and FASTER than the '--seed' option.
#   Usage is:
#   % mafft --add newSequences existingAlignment > output
############################################################################
nhomologs = 7
seedoffset = 0
seed = seed1
rescale = 1
Gap Penalty = -1.53, +0.00, -0.12
tsuyosa = 4900.000000

adding 1-2
multi2hat3s (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, +0.12, -0.00, noshift, amax=0.0
1 thread(s)

nthread = 0
nthreadpair = 0
nthreadtb = 0
ppenalty_ex = 0
stacksize: 8192 kb
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00



Making a distance matrix ..
    1 / 10
done.

Constructing a UPGMA tree (efffree=1) ... 
    0 / 10
done.

Progressive alignment 1/1... 
STEP     9 /

mafft --seed ../../testcat/4.10.530/alnscratch/24_inter3di.fasta ../../testcat/4.10.530/alnscratch/23_inter3di.fasta > ../../testcat/4.10.530/alnscratch/26_inter3di.fasta


############################################################################
#   Progressive alignment method is incompatible with the --seed option.
#   Automatically switched to the iterative refinement method.
#   
# Also consider using the '--add' option, which is compatible with
#   the progressive method and FASTER than the '--seed' option.
#   Usage is:
#   % mafft --add newSequences existingAlignment > output
############################################################################
nhomologs = 7
seedoffset = 0
seed = seed1
rescale = 1
Gap Penalty = -1.53, +0.00, -0.12
tsuyosa = 4900.000000

adding 1-2
multi2hat3s (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, +0.12, -0.00, noshift, amax=0.0
1 thread(s)

nthread = 0
nthreadpair = 0
nthreadtb = 0
ppenalty_ex = 0
stacksize: 8192 kb
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00



Making a distance matrix ..
    1 / 10
done.

Constructing a UPGMA tree (efffree=1) ... 
    0 / 10
done.

Progressive alignment 1/1... 
STEP     9 /

final aln
childalnsAA {<toytree.TreeNode.TreeNode object at 0x7f18d57b3d60>: {'fasta': '../../testcat/4.10.530/alnscratch/28_inter.fasta'}, <toytree.TreeNode.TreeNode object at 0x7f18d57c4790>: {'fasta': '../../testcat/4.10.530/alnscratch/27_inter.fasta'}, <toytree.TreeNode.TreeNode object at 0x7f18d57c5c30>: {'fasta': '../../testcat/4.10.530/alnscratch/26_inter.fasta'}}
childalns3di {<toytree.TreeNode.TreeNode object at 0x7f18d57b3d60>: {'fasta': '../../testcat/4.10.530/alnscratch/28_inter.3di.fasta'}, <toytree.TreeNode.TreeNode object at 0x7f18d57c4790>: {'fasta': '../../testcat/4.10.530/alnscratch/27_inter3di.fasta'}, <toytree.TreeNode.TreeNode object at 0x7f18d57c5c30>: {'fasta': '../../testcat/4.10.530/alnscratch/26_inter3di.fasta'}}
[('../../testcat/4.10.530/alnscratch/28_inter.fasta', '../../testcat/4.10.530/alnscratch/28_inter.3di.fasta', <class 'str'>, <class 'str'>), ('../../testcat/4.10.530/alnscratch/27_inter.fasta', '../../testcat/4.10.530/alnscratch/27_inter3di.fasta', <c

############################################################################
#   Progressive alignment method is incompatible with the --seed option.
#   Automatically switched to the iterative refinement method.
#   
# Also consider using the '--add' option, which is compatible with
#   the progressive method and FASTER than the '--seed' option.
#   Usage is:
#   % mafft --add newSequences existingAlignment > output
############################################################################
nhomologs = 4
seedoffset = 0
seed = seed1
rescale = 1
Gap Penalty = -1.53, +0.00, -0.12
tsuyosa = 1600.000000

adding 0-1
multi2hat3s (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, +0.12, -0.00, noshift, amax=0.0
1 thread(s)

nthread = 0
nthreadpair = 0
nthreadtb = 0
ppenalty_ex = 0
stacksize: 8192 kb
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00



Making a distance matrix ..
    1 / 6
done.

Constructing a UPGMA tree (efffree=1) ... 
    0 / 6
done.

Progressive alignment 1/1... 
STEP     5 / 5

mafft --textmatrix ../mafftmat/3diHEXmat.txt --seed ../../testcat/4.10.530/alnscratch/28_inter.3di.fasta ../../testcat/4.10.530/alnscratch/27_inter3di.fasta > ../../testcat/4.10.530/alnscratch/29_inter3di.fasta


############################################################################
#   Progressive alignment method is incompatible with the --seed option.
#   Automatically switched to the iterative refinement method.
#   
# Also consider using the '--add' option, which is compatible with
#   the progressive method and FASTER than the '--seed' option.
#   Usage is:
#   % mafft --add newSequences existingAlignment > output
############################################################################
nhomologs = 4
seedoffset = 0
seed = seed1
rescale = 1
Gap Penalty = -1.53, +0.00, -0.12
tsuyosa = 1600.000000

adding 0-1
multi2hat3s (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, +0.12, -0.00, noshift, amax=0.0
1 thread(s)

nthread = 0
nthreadpair = 0
nthreadtb = 0
ppenalty_ex = 0
stacksize: 8192 kb
nalphabets = 256
nused=
Gap Penalty = -1.53, +0.00, +0.00



Making a distance matrix ..
    1 / 6
done.

Constructing a UPGMA tree (efffree=1) ... 
    0 / 6
done.

Progressive alignment 1/1... 
ST

aln1
>_seed_Q8N539
--PRDCLDVLLSGQQDDGVYSVF-PTHY--PAGFQVYCDMRTDGGGWTVFQRREDGSVN-
--FFRGWDAYRDGFGRLTGEHWLGLKRIHALTTQAAYELHVDLEDFENGTAYARYGSFGV
GLFSVDPEEDGYPLTVADYS-GTAGDSLLKHSGMRFTTKDRDSDHSENNCAAFYRGAWWY
RNCHTSNLNGQYLRGAHASYADGVEWSSWTGWQYSLKFSEMKIRPV
>_seed_Q9U8W8
--PTDCADILLNGYRSSGGYRIW-PKSWMTVGTLNVYCDMETDGGGWTVIQRRGNYGNPS
DYFYKPWKNYKLGFGNIEKDFWLGNDRIFALTNQRNYMIRFDLKDKENDTRYAIYQDFWI
-----ENEDYLYCLHIGNYS-GDAGNSFGRHNGHNFSTIDKDHDTHETHCAQTYKGGWWY
DRCHESNLNGLYLNGEHNSYADGIEWRAWKGYHYSLPQVEMKIRPV
>Q15485
TGPRTCKDLLDRGHFLSGWHTIYLPDCR--P--LTVLCDMDTDGGGWTVFQRRVDGSVD-
--FYRDWATYKQGFGSRLGEFWLGNDNIHALTAQGTSELRVDLVDFEDNYQFAKYRS---
--FKVADEAEKYNLVLGAFVEGSAGDSLTFHNNQSFSTKDQDNDLNTGNCAVMFQGAWWY
KNCHVSNLNGRYLRGTHGSFANGINWKSGKGYNYSYKVSEMKVRPA
>O00602
TGPRNCKDLLDRGYFLSGWHTIYLPDCR--P--LTVLCDMDTDGGGWTVFQRRMDGSVD-
--FYRDWAAYKQGFGSQLGEFWLGNDNIHALTAQGSSELRVDLVDFEGNHQFAKYKS---
--FKVADEAEKYKLVLGAFVGGSAGNSLTGHNNNFFSTKDQDNDVSSSNCAEKFQGAWWY
ADCHASNLNGLYLMGPHESFANGINWSAAKGYKYSYKVSEMKVRPA
>O75636
-GPRNCRELLSQGATLSGWYHLC

############################################################################
#   Progressive alignment method is incompatible with the --seed option.
#   Automatically switched to the iterative refinement method.
#   
# Also consider using the '--add' option, which is compatible with
#   the progressive method and FASTER than the '--seed' option.
#   Usage is:
#   % mafft --add newSequences existingAlignment > output
############################################################################
nhomologs = 4
seedoffset = 0
seed = seed1
rescale = 1
Gap Penalty = -1.53, +0.00, -0.12
tsuyosa = 1600.000000

adding 4-5
multi2hat3s (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, +0.12, -0.00, noshift, amax=0.0
1 thread(s)

nthread = 0
nthreadpair = 0
nthreadtb = 0
ppenalty_ex = 0
stacksize: 8192 kb
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00



Making a distance matrix ..
    1 / 10
done.

Constructing a UPGMA tree (efffree=1) ... 
    0 / 10
done.

Progressive alignment 1/1... 
STEP     9 /

mafft --textmatrix ../mafftmat/3diHEXmat.txt --seed ../../testcat/4.10.530/alnscratch/29_inter3di.fasta ../../testcat/4.10.530/alnscratch/27_inter3di.fasta > ../../testcat/4.10.530/alnscratch/29_inter3di.fasta.iter


############################################################################
#   Progressive alignment method is incompatible with the --seed option.
#   Automatically switched to the iterative refinement method.
#   
# Also consider using the '--add' option, which is compatible with
#   the progressive method and FASTER than the '--seed' option.
#   Usage is:
#   % mafft --add newSequences existingAlignment > output
############################################################################
nhomologs = 4
seedoffset = 0
seed = seed1
rescale = 1
Gap Penalty = -1.53, +0.00, -0.12
tsuyosa = 1600.000000

adding 4-5
multi2hat3s (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, +0.12, -0.00, noshift, amax=0.0
1 thread(s)

nthread = 0
nthreadpair = 0
nthreadtb = 0
ppenalty_ex = 0
stacksize: 8192 kb
nalphabets = 256
nused=
Gap Penalty = -1.53, +0.00, +0.00



Making a distance matrix ..
    1 / 10
done.

Constructing a UPGMA tree (efffree=1) ... 
    0 / 10
done.

Progressive alignment 1/1... 


aln0
>_seed__seed_Q8N539
--PRDCLDVLLSGQQDDGVYSVF-PTHY--PAGFQVYCDMRTDGGGWTVFQRREDGSVN-
--FFRGWDAYRDGFGRLTGEHWLGLKRIHALTTQAAYELHVDLEDFENGTAYARYGSFGV
GLFSVDPEEDGYPLTVADYS-GTAGDSLLKHSGMRFTTKDRDSDHSENNCAAFYRGAWWY
RNCHTSNLNGQYLRGAHASYADGVEWSSWTGWQYSLKFSEMKIRPV
>_seed__seed_Q9U8W8
--PTDCADILLNGYRSSGGYRIW-PKSWMTVGTLNVYCDMETDGGGWTVIQRRGNYGNPS
DYFYKPWKNYKLGFGNIEKDFWLGNDRIFALTNQRNYMIRFDLKDKENDTRYAIYQDFWI
-----ENEDYLYCLHIGNYS-GDAGNSFGRHNGHNFSTIDKDHDTHETHCAQTYKGGWWY
DRCHESNLNGLYLNGEHNSYADGIEWRAWKGYHYSLPQVEMKIRPV
>_seed_Q15485
TGPRTCKDLLDRGHFLSGWHTIYLPDCR--P--LTVLCDMDTDGGGWTVFQRRVDGSVD-
--FYRDWATYKQGFGSRLGEFWLGNDNIHALTAQGTSELRVDLVDFEDNYQFAKYRS---
--FKVADEAEKYNLVLGAFVEGSAGDSLTFHNNQSFSTKDQDNDLNTGNCAVMFQGAWWY
KNCHVSNLNGRYLRGTHGSFANGINWKSGKGYNYSYKVSEMKVRPA
>_seed_O00602
TGPRNCKDLLDRGYFLSGWHTIYLPDCR--P--LTVLCDMDTDGGGWTVFQRRMDGSVD-
--FYRDWAAYKQGFGSQLGEFWLGNDNIHALTAQGSSELRVDLVDFEGNHQFAKYKS---
--FKVADEAEKYKLVLGAFVGGSAGNSLTGHNNNFFSTKDQDNDVSSSNCAEKFQGAWWY
ADCHASNLNGLYLMGPHESFANGINWSAAKGYKYSYKVSEMKVRPA
>_seed_

############################################################################
#   Progressive alignment method is incompatible with the --seed option.
#   Automatically switched to the iterative refinement method.
#   
# Also consider using the '--add' option, which is compatible with
#   the progressive method and FASTER than the '--seed' option.
#   Usage is:
#   % mafft --add newSequences existingAlignment > output
############################################################################
nhomologs = 10
seedoffset = 0
seed = seed1
rescale = 1
Gap Penalty = -1.53, +0.00, -0.12
tsuyosa = 10000.000000

adding 8-9
multi2hat3s (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, +0.12, -0.00, noshift, amax=0.0
1 thread(s)

nthread = 0
nthreadpair = 0
nthreadtb = 0
ppenalty_ex = 0
stacksize: 8192 kb
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00



Making a distance matrix ..
    1 / 20
done.

Constructing a UPGMA tree (efffree=1) ... 
   10 / 20
done.

Progressive alignment 1/1... 
STEP    19

mafft --textmatrix ../mafftmat/3diHEXmat.txt --seed ../../testcat/4.10.530/alnscratch/29_inter3di.fasta.iter ../../testcat/4.10.530/alnscratch/26_inter3di.fasta > ../../testcat/4.10.530/alnscratch/29_inter3di.fasta.iter.iter


############################################################################
#   Progressive alignment method is incompatible with the --seed option.
#   Automatically switched to the iterative refinement method.
#   
# Also consider using the '--add' option, which is compatible with
#   the progressive method and FASTER than the '--seed' option.
#   Usage is:
#   % mafft --add newSequences existingAlignment > output
############################################################################
nhomologs = 10
seedoffset = 0
seed = seed1
rescale = 1
Gap Penalty = -1.53, +0.00, -0.12
tsuyosa = 10000.000000

adding 8-9
multi2hat3s (aa) Version 7.520
alg=A, model=BLOSUM62, 1.53, +0.12, -0.00, noshift, amax=0.0
1 thread(s)

nthread = 0
nthreadpair = 0
nthreadtb = 0
ppenalty_ex = 0
stacksize: 8192 kb
nalphabets = 256
nused=
Gap Penalty = -1.53, +0.00, +0.00



Making a distance matrix ..
    1 / 20
done.

Constructing a UPGMA tree (efffree=1) ... 
   10 / 20
done.

Progressive alignment 1/1...

aln1
>_seed__seed__seed_Q8N539
--------------------------------------------------PRDCLDVLLS
GQQDDGVYSVF-PTHY--PAGFQVYCDMRTDGGGWTVFQRREDGSVN---FFRGWDAYRD
GFGRLT---------------GEHWLGLKRIHALTTQAA--YELHVDLEDFENGTAYARY
GSFGVGLFSVDPEEDGYPLTVADYS-GTAGDSL-------------------LKHSGMRF
TTKDRDSDHS-----ENNCAAFYRGAWWYRNCHTSNLNGQYLRGAHAS-------YADGV
EWSSWTGWQYSLKFSEMKIRPV----
>_seed__seed__seed_Q9U8W8
--------------------------------------------------PTDCADILLN
GYRSSGGYRIW-PKSWMTVGTLNVYCDMETDGGGWTVIQRRGNYGNPSDYFYKPWKNYKL
GFGNIE---------------KDFWLGNDRIFALTNQRN--YMIRFDLKDKENDTRYAIY
QDFWI-----ENEDYLYCLHIGNYS-GDAGNSF-------------------GRHNGHNF
STIDKDHDTH-----ETHCAQTYKGGWWYDRCHESNLNGLYLNGEHNS-------YADGI
EWRAWKGYHYSLPQVEMKIRPV----
>_seed__seed_Q15485
------------------------------------------------TGPRTCKDLLDR
GHFLSGWHTIYLPDCR--P--LTVLCDMDTDGGGWTVFQRRVDGSVD---FYRDWATYKQ
GFGSRL---------------GEFWLGNDNIHALTAQGT--SELRVDLVDFEDNYQFAKY
RS-----FKVADEAEKYNLVLGAFVEGSAGDSL-------------------TFHNNQSF
STKDQDNDLN-----

 identical.   
Converged.

done
dvtditr (text) Version 7.520
alg=A, model=Extended, 1.53, -0.00, -0.00, noshift, amax=0.0
0 thread(s)


Strategy:
 NW-INS-i (Not tested.)
 ?

If unsure which option to use, try 'mafft --auto input > output'.
For more information, see 'mafft --help', 'mafft --man' and the mafft page.

The default gap scoring scheme has been changed in version 7.110 (2013 Oct).
It tends to insert more gaps into gap-rich regions than previous versions.
To disable this change, add the --leavegappyregion option.



'../../testcat/4.10.530/aln3difoldtree.fasta'

In [125]:

finalaln = remove_seeds(finalaln)
finalaln3di = remove_seeds(finalaln3di)


In [126]:
print( glob.glob(infolder + 'aln*fasta'))

['../../testcat/4.10.530/alnAA_AA.fasta', '../../testcat/4.10.530/alnAAfoldtree.fasta', '../../testcat/4.10.530/aln3di_3di.fasta', '../../testcat/4.10.530/aln3difoldtree.fasta', '../../testcat/4.10.530/alnAA_3di.fasta', '../../testcat/4.10.530/aln3di_AA.fasta']


In [127]:
print('snamk')
#print number of sequences in the final alignment   
with open(infolder + 'alnAAfoldtree.fasta') as f:
    print( f.read().count('>'))
with open(infolder + 'alnAAfoldtree.fasta') as f:
    print( f.read())


snamk
16
>Q8N539
--------------------------------------------------PRDCLDVLLSGQQDDGVYSVF-PTHY--PAGFQVYCDMRTDGGGWTVFQRREDGSVN---FFRGWDAYRDGFGRLT---------------GEHWLGLKRIHALTTQAA--YELHVDLEDFENGTAYARYGSFGVGLFSVDPEEDGYPLTVADYS-GTAGDSL-------------------LKHSGMRFTTKDRDSDHS-----ENNCAAFYRGAWWYRNCHTSNLNGQYLRGAHAS-------YADGVEWSSWTGWQYSLKFSEMKIRPV----
>Q9U8W8
--------------------------------------------------PTDCADILLNGYRSSGGYRIW-PKSWMTVGTLNVYCDMETDGGGWTVIQRRGNYGNPSDYFYKPWKNYKLGFGNIE---------------KDFWLGNDRIFALTNQRN--YMIRFDLKDKENDTRYAIYQDFWI-----ENEDYLYCLHIGNYS-GDAGNSF-------------------GRHNGHNFSTIDKDHDTH-----ETHCAQTYKGGWWYDRCHESNLNGLYLNGEHNS-------YADGIEWRAWKGYHYSLPQVEMKIRPV----
>Q15485
------------------------------------------------TGPRTCKDLLDRGHFLSGWHTIYLPDCR--P--LTVLCDMDTDGGGWTVFQRRVDGSVD---FYRDWATYKQGFGSRL---------------GEFWLGNDNIHALTAQGT--SELRVDLVDFEDNYQFAKYRS-----FKVADEAEKYNLVLGAFVEGSAGDSL-------------------TFHNNQSFSTKDQDNDLN-----TGNCAVMFQGAWWYKNCHVSNLNGRYLRGTHGS-------FANGINWKSGKGYNYSYK

In [128]:

print( len(glob.glob(infolder + 'structs/*.pdb')) )




print('finalaln',finalaln)
#print number of sequences in the final alignment   
with open(finalaln) as f:
    print( f.read().count('>'))

print('finalaln3di',finalaln3di)
with open(finalaln3di) as f:
    print( f.read().count('>'))



16
finalaln ../../testcat/4.10.530/alnscratch/29_root.fasta.iter.iter
16
finalaln3di ../../testcat/4.10.530/alnscratch/29_inter3di.fasta.iter.iter
16
