# Pst_104E_v12_post_allele_analysis_v02_RT

Based on original code by Benjamin Schwessinger.

- Inputs: output from `DK_0911_defining_alleles_v02` & primary+haplotig (ph) protein/gene/cds .*fasta* files from `DK_0911_generate_fasta_files_from_gff3`.
- Programs: **MUSCLE**, **PAML**
- Purpose: generate and save a DataFrame containing dN/dS information (number of nonsynonymous substitutions per non-synonymous site to the number of synonymous substitutions per synonymous site), as well as Hamming & Levenshtein distances (measures of % identity). Also provides visualisations of some of this data.

#### Overview
1. Reads in the large allele DataFrames generated in `DK_0911_defining_alleles_v02` (i.e. proteinortho hits OR best blast hit) - see description header cell in that notebook for more information on which alleles are included in that DataFrame.
2. Filters the allele DataFrames based on %ID and %QCov (this can be set to filter only BLAST-identified alleles or both BLAST- and proteinortho-identified alleles) so that distance information is not calculated on an unnecessarily large number of alleles.
3. Calculates distance & dN/dS information, and saves this to an output file so that it does not have to be re-calculated (if for whatever reason, the inputs change so that dN/dS or distance information should change, this output file (`DK_0911_v0x_analysed_alleles.df`) should be deleted so that it can be re-generated.
4. Plots graphs of allele-type distribution (pie chart) and allele-type Levenshtein distances (measures of similarity) for different levels of allele-filtering (QCov/TCov/%ID/Levenshtein similarity).

NB:
- dN/dS information is currently not utilised in this script.

In [1]:
%matplotlib inline

In [3]:
import pandas as pd
import os
import shutil
from Bio import SeqIO
from Bio import AlignIO
import distance
import editdistance
import math
import subprocess
import numpy as np
import re
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import collections

In [19]:
GENOME_VERSION = 'Pst_104E_v12'

BASE_PATH = '/home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/Pst_104E_v12/Warrior_comp_runs/allele_analysis'

YN00_PATH = os.path.join(BASE_PATH, 'post_allele_analysis', 'yn00.ctl')
BASE_OUT_PATH = os.path.join(BASE_PATH, 'post_allele_analysis')
ALLELE_PATH = os.path.join(BASE_PATH,'defining_alleles/allele_analysis/alleles_proteinortho_graph516' )
UNFILTERED_DF_PATH = os.path.join(BASE_PATH,'defining_alleles/allele_analysis',\
                '%s_p_ctg.%s_h_ctg.0.001.blastp.outfmt6.allele_analysis' % (GENOME_VERSION, GENOME_VERSION))
GENOME_PATH = '/home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/032017_assembly'
FIGURE_PATH = os.path.join(BASE_OUT_PATH, 'figures')

GENOME = GENOME_VERSION
P_GENOME = GENOME + '_p_ctg'
H_GENOME = GENOME + '_h_ctg'


# Base filtering so that distance calculations are not performed on all allele pairs.
# Distance calculations will only be performed on allele pairs above the defined cutoffs.
# Note that proteinortho alleles will not be affected (this can be changed in the filterAlleleDf function).
BASE_QCOV_CUTOFF = 70
BASE_TCOV_CUTOFF = 70
BASE_PCTID_CUTOFF = 70

P_PROTEINS_FASTA = os.path.join(GENOME_PATH, P_GENOME + '.anno.protein.fa')
PH_PROTEIN_FASTA = os.path.join(GENOME_PATH, GENOME + '_ph_ctg.anno.protein.fa')
PH_GENE_FASTA = os.path.join(GENOME_PATH, GENOME + '_ph_ctg.anno.gene.fa')
PH_CDS_FASTA = os.path.join(GENOME_PATH, GENOME + '_ph_ctg.anno.CDS.fa')

PAML_PATH = os.path.join(BASE_OUT_PATH, 'paml')
if not os.path.exists(BASE_OUT_PATH):
    os.mkdir(BASE_OUT_PATH)
if not os.path.exists(FIGURE_PATH):
    os.mkdir(FIGURE_PATH)
if not os.path.exists(PAML_PATH):
    os.mkdir(PAML_PATH)
    shutil.copy2(YN00_PATH, PAML_PATH)

In [20]:
def assignMatchType(allele_source, overlap, no_overlap):
    s = allele_source + '_'
    
    if overlap:
        s += 'overlap'
    elif no_overlap:
        s += 'no_overlap'
    else: # different_pcontig
        s += 'unlinked'
    return s

def reduceGroups(g):
    '''returns the best hit based on e-value and BitScore per group'''
    if len(g) == 1:
        return g
    tmp_g = g[g['e-value'] == g['e-value'].min()]
    if len(tmp_g) == 1:
        return tmp_g
    return tmp_g[tmp_g['BitScore'] == tmp_g['BitScore'].max()]

def filterAlleleDf(alleleDf, qCov, tCov, pctId, levSim, leavePO=False):
    if leavePO:
        no_PO_df = alleleDf[(alleleDf['allele_source'] == 'h_rBLAST') | (alleleDf['allele_source'] == 'BLAST')]
        PO_df = alleleDf[alleleDf['allele_source'] == 'PO']

        filtered_no_PO_df = filterAlleleDf(no_PO_df, qCov, tCov, pctId, levSim)
        return filtered_no_PO_df.append(PO_df, ignore_index=True)
    
    if qCov:
        alleleDf = alleleDf[alleleDf['QCov'] > qCov]
    if tCov:
        alleleDf = alleleDf[alleleDf['TCov'] > tCov]
    if pctId:
        alleleDf = alleleDf[alleleDf['PctID'] > pctId]
    if levSim:
        levDist = (100-levSim)/100.0
        alleleDf = alleleDf[alleleDf['protein_levenshtein'] < levDist]

    return alleleDf

In [21]:
hFullAlleleDf = pd.read_csv(os.path.join(ALLELE_PATH, '%s.full_df.alleles' % H_GENOME), header=0, sep='\t')
hFullAlleleDf = hFullAlleleDf[~hFullAlleleDf['Target'].isnull()]
hFullAlleleDf['matchType'] = hFullAlleleDf.apply(lambda row: assignMatchType(row['allele_source'], row['t_contig == h_contig_overlap'], row['q_contig == t_contig']), axis=1)

pFullAlleleDf = pd.read_csv(os.path.join(ALLELE_PATH, '%s.full_df.alleles' % P_GENOME), header=0, sep='\t')
pFullAlleleDf = pFullAlleleDf[~pFullAlleleDf['Target'].isnull()]
pFullAlleleDf['matchType'] = pFullAlleleDf.apply(lambda row: assignMatchType(row['allele_source'], row['t_contig == h_contig_overlap'], row['q_contig == t_contig']), axis=1)

# filter out primary proteins below QCov/TCov/%ID cut-offs.
pFilteredAlleleDf = filterAlleleDf(pFullAlleleDf, BASE_QCOV_CUTOFF, BASE_TCOV_CUTOFF, BASE_PCTID_CUTOFF, False, True)

In [22]:
# filter out haplotig proteins that already have alleles identified by BLAST or proteinortho.
hFilteredAlleleDf = hFullAlleleDf[(~hFullAlleleDf['Query'].isin(pFullAlleleDf['Query'])) & (~hFullAlleleDf['Query'].isin(pFullAlleleDf['Target']))]
# filter out haplotig proteins below QCov/TCov/%ID cut-offs.
hFilteredAlleleDf = filterAlleleDf(hFilteredAlleleDf, BASE_QCOV_CUTOFF, BASE_TCOV_CUTOFF, BASE_PCTID_CUTOFF, False, False)

In [23]:
phFilteredAlleleDf = pFilteredAlleleDf.append(hFilteredAlleleDf)

In [24]:
alleleDf = phFilteredAlleleDf

alleleDf['folder'] = alleleDf.Query + '_' + alleleDf.Target
alleleDf.set_index('folder', inplace=True)

# assert(len(alleleDf) == len(overlapDf) + len(noOverlapDf) + len(diffContigDf) + len(manualAssignDf))

In [25]:
def getFastaDict(fastaFile):
    d = {}
    for gene in SeqIO.parse(fastaFile, 'fasta'):
        d[gene.id] = gene
    return d

SEQRECORD_PROTEIN_DICT = getFastaDict(PH_PROTEIN_FASTA)
SEQRECORD_GENE_DICT = getFastaDict(PH_GENE_FASTA)
SEQRECORD_CDS_DICT = getFastaDict(PH_CDS_FASTA)

In [26]:
def writeAllelicFasta(alleleOne, alleleTwo, alleleType, outPath):
    '''writes fasta file containing fasta information for two alleles
    in the outPath'''
    assert(alleleType.upper() in ['CDS', 'GENE', 'PROTEIN'])
    
    seqRecordDict = globals()['SEQRECORD_' + alleleType.upper() + '_DICT']
    try:
        alleleSeqRecords = [seqRecordDict[alleleOne], seqRecordDict[alleleTwo]]
    except KeyError:
        print(alleleOne)
        print(alleleTwo)
        print(alleleType)
        sys.exit()
    with open(os.path.join(outPath, alleleType.lower() + '.fa'), 'w') as outFile:
        SeqIO.write(alleleSeqRecords, outFile, 'fasta')
    return True

def writeAlignmentScript(alleleOutPath, scriptLoc = os.path.join(PAML_PATH, 'paml_script.sh')):
    with open(scriptLoc, 'a') as outFile:
        print('cd %s' % alleleOutPath, file=outFile)
        print('/home/gamran/anaconda3/muscle3.8.31_i86linux64 -clwstrict -in protein.fa -out protein.aln', file=outFile)
        print('perl /home/gamran/anaconda3/pal2nal.v14/pal2nal.pl -output paml protein.aln cds.fa > cds_codon.aln', file=outFile)
        print('perl /home/gamran/anaconda3/pal2nal.v14/pal2nal.pl protein.aln cds.fa > cds_codon.clustal', file=outFile)
        print('cp %s/yn00.ctl ./' % PAML_PATH, file=outFile)
        print('/home/gamran/anaconda3/paml4.9g/bin/yn00', file=outFile)
    return True

In [27]:
def prepareAlignmentBashScript(scriptLoc = os.path.join(PAML_PATH, 'paml_script.sh')):
    with open(scriptLoc, 'w') as pamlScript:
        print('#!/bin/bash', file=pamlScript)

    for index, [Query, Target] in alleleDf.iloc[:, :2].iterrows():
        alleleOutPath = os.path.join(PAML_PATH, '%s_%s' % (Query, Target))
        if not os.path.exists(alleleOutPath):
            os.mkdir(os.path.join(PAML_PATH, '%s_%s' % (Query, Target)))

        writeAllelicFasta(Query, Target, 'CDS', alleleOutPath)
        writeAllelicFasta(Query, Target, 'PROTEIN', alleleOutPath)

        writeAlignmentScript(alleleOutPath, os.path.join(PAML_PATH, 'paml_script.sh'))

In [28]:
def assignDistancesToAlleles(df, folder, alignmentFile, alleleType):
    '''Adds Hamming and Levenshtein distance columns to an allele pair
    (indexed by 'folder' name) in df'''
    assert(alleleType.upper() in ['PROTEIN', 'CDS', 'GENE'])
    seq1, seq2 = AlignIO.read(open(alignmentFile, 'r'), format='clustal', seq_count=2)
    seq1 = str(seq1.seq).upper()
    seq2 = str(seq2.seq).upper()
    assert(len(seq1) == len(seq2))
    df.loc[folder, alleleType.lower() + '_hamming'] = distance.hamming(seq1, seq2, normalized=True)
    df.loc[folder, alleleType.lower() + '_levenshtein'] = editdistance.eval(seq1, seq2)/len(seq1)
    return df

def assignDistancesToAllAlleles(alleleDf):
    count = 0
    total = len(alleleDf)
    percentDone = 0
    
    print("Calculating distances and adding them to the allele DataFrame...")
    
    for folder in alleleDf.index:

        proteinAlignmentFile = os.path.join(PAML_PATH, folder, 'protein.aln')
        alleleDf = assignDistancesToAlleles(alleleDf, folder, proteinAlignmentFile, 'PROTEIN')

        cdsAlignmentFile = os.path.join(PAML_PATH, folder, 'cds_codon.clustal')
        alleleDf = assignDistancesToAlleles(alleleDf, folder, cdsAlignmentFile, 'CDS')

        count += 1
        if round(count/total * 100) > percentDone:
            percentDone = round(count/total * 100)
            print("%s%% complete" % percentDone)
    return alleleDf

In [29]:
def parse_dNdS_to_df(line, alleleDf, folder, dNdS_label):
    dN = re.findall(r'dN = [-| ]?(.*) w', line)[0]
    dS = re.findall(r'dS = [-| ]?(.*) dN', line)[0]
    return assign_dNdS(dN, dS, alleleDf, folder, dNdS_label)

def assign_dNdS(dN, dS, alleleDf, folder, dNdS_label):
    if float(dS) > 0:
        alleleDf.loc[folder, dNdS_label] = float(dN)/float(dS)
    else:
        alleleDf.loc[folder, dNdS_label] = np.nan
    return alleleDf

def assign_dNdS_to_all_alleles(alleleDf):
    for folder in alleleDf.index:
        alleleYn = os.path.join(PAML_PATH, folder,'yn.out')
        with open(alleleYn, 'r') as ynOut:
            #now loop over the lines and parse out stuff
            for i, line in enumerate(ynOut):
                if line.startswith('seq. seq. ') and i > 0:
                    next(ynOut) # we want the line that is two after the line starting with 'seq. seq '
                    dataLine = next(ynOut)
                    dN = dataLine.split('+-')[0].rstrip().split(' ')[-1]
                    dS = dataLine.split('+-')[1].rstrip().split(' ')[-1]
                    alleleDf = assign_dNdS(dN, dS, alleleDf, folder, 'yn00_dN/dS')
                elif line.startswith('LWL85:') and 'nan' not in line:
                    alleleDf = parse_dNdS_to_df(line, alleleDf, folder, 'LWL85_dN/dS')
                elif line.startswith('LWL85m:') and 'nan' not in line:
                    alleleDf = parse_dNdS_to_df(line, alleleDf, folder, 'LWL85m_dN/dS')
                elif line.startswith('LPB93:') and 'nan' not in line:
                    alleleDf = parse_dNdS_to_df(line, alleleDf, folder, 'LPB93_dN/dS')
                else:
                    continue
    return alleleDf

In [30]:
os.chdir('/home/benjamin/genome_assembly/Warrior/RT_script_base/DK_0911-1/scripts')
%run file_counting.ipynb

def checkPamlFilesExist(alleleDf):
    '''loops through all folder names in alleleDf.index to check if their PAML files have
    all been generated in those folders. refDict is based on the contents of a folder
    that was known to be run successfully.'''
    refDict = {'aln': 2,
     'clustal': 1,
     'ctl': 1,
     'dN': 1,
     'dS': 1,
     'fa': 2,
     'out': 1,
     'rst': 1,
     'rst1': 1,
     'rub': 1,
     't': 1}
    for file in alleleDf.index:
        if not os.path.exists(os.path.join(PAML_PATH, file)):
            return False
        discrepancies = getDiscrepancies(os.path.join(PAML_PATH, file), refDict)
        if discrepancies != '':
            print(discrepancies)
            return False
    return True

In [None]:
def main(alleleDf = alleleDf):
    prepareAlignmentBashScript(os.path.join(PAML_PATH, 'paml_script.sh'))
    
    # if already run before, comment out this line
    print("Checking whether all PAML files already exist in %s..." % PAML_PATH)
    if checkPamlFilesExist(alleleDf):
        print('PAML appears to have been run to completion previously. Therefore, it will not be run this time.')
    else:
        'Not all files generated by PAML appear to exist. Running PAML now (this may take some time)...'
        !bash {os.path.join(PAML_PATH, 'paml_script.sh')}
        print('PAML finished running.')

    analysedAllelesPath = os.path.join(BASE_OUT_PATH, GENOME+'_analysed_alleles.df')
    if os.path.exists(analysedAllelesPath) and os.path.getsize(analysedAllelesPath) > 0:
        print("DataFrame with distance calculations at %s appears to have already been generated. Reading in this dataframe instead of re-generating it." % analysedAllelesPath)
        alleleDf = pd.read_csv(analysedAllelesPath, sep='\t', index_col=0)
    else:
        alleleDf = assignDistancesToAllAlleles(alleleDf)
        alleleDf.to_csv(analysedAllelesPath, sep='\t')
        pd.util.testing.assert_frame_equal(alleleDf, pd.read_csv(analysedAllelesPath, sep='\t', index_col=0))
    
    alleleDf = assign_dNdS_to_all_alleles(alleleDf)
    alleleDf.to_csv(analysedAllelesPath, sep='\t')
    
    return alleleDf

In [None]:
if __name__ == "__main__":
    alleleDf = main()

Checking whether all PAML files already exist in /home/benjamin/genome_assembly/PST79/FALCON/p_assemblies/v9_1/Pst_104E_v12/Warrior_comp_runs/allele_analysis/post_allele_analysis/paml...
evm.model.pcontig_000.108_evm.model.hcontig_000_003.117: Discrepancy in *.dS files: 0 [FOLDER] vs 1 [REF]
evm.model.pcontig_000.108_evm.model.hcontig_000_003.117: Discrepancy in *.rst files: 0 [FOLDER] vs 1 [REF]
evm.model.pcontig_000.108_evm.model.hcontig_000_003.117: Discrepancy in *.rst1 files: 0 [FOLDER] vs 1 [REF]
evm.model.pcontig_000.108_evm.model.hcontig_000_003.117: Discrepancy in *.ctl files: 0 [FOLDER] vs 1 [REF]
evm.model.pcontig_000.108_evm.model.hcontig_000_003.117: Discrepancy in *.clustal files: 0 [FOLDER] vs 1 [REF]
evm.model.pcontig_000.108_evm.model.hcontig_000_003.117: Discrepancy in *.dN files: 0 [FOLDER] vs 1 [REF]
evm.model.pcontig_000.108_evm.model.hcontig_000_003.117: Discrepancy in *.aln files: 0 [FOLDER] vs 2 [REF]
evm.model.pcontig_000.108_evm.model.hcontig_000_003.117: Disc

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 435
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     59 patterns at    145 /    145 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     59 patterns at    145 /    145 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 8.31

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 627, avg  length 622
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 933
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    118 patterns at    258 /    258 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    118 patterns at    258 /    258 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.81

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 410, avg  length 408
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

00:00:00     14 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     14 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 2619
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     68 patterns at    659 /    659 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     68 patterns at    659 /    659 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 0.51

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 887, avg  length 856
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     15 MB(7%)  Iter   1  100.00%  Align node       

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1608
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    212 patterns at    511 /    511 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    212 patterns at    511 /    511 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.83

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 874, avg  length 766
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     14 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     14 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1749
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    134 patterns at    574 /    574 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    134 patterns at    574 /    574 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.94

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 836, avg  length 787
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     14 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     14 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1149
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     60 patterns at    356 /    356 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     60 patterns at    356 /    356 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.60

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 960, avg  length 959
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     15 MB(8%)  Iter   1  100.00%  Align node       
00:00:00     15 MB(8%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 720
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     69 patterns at    240 /    240 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     69 patterns at    240 /    240 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.68

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 229, avg  length 228
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

00:00:00     15 MB(8%)  Iter   1  100.00%  Align node       
00:00:00     15 MB(8%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 3093
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    320 patterns at   1016 /   1016 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    320 patterns at   1016 /   1016 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.99

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 139, avg  length 138
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 318
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     77 patterns at    105 /    105 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     77 patterns at    105 /    105 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.06

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 90, avg  length 90
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from y

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 774
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    108 patterns at    222 /    222 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    108 patterns at    222 /    222 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.08

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 343, avg  length 314
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1551
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    180 patterns at    482 /    482 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    180 patterns at    482 /    482 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.77

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 228, avg  length 215
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       

00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1833
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     79 patterns at    460 /    460 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     79 patterns at    460 /    460 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.57

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 314, avg  length 302
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1113
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     69 patterns at    370 /    370 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     69 patterns at    370 /    370 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 6.38

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 117, avg  length 106
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 855
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    159 patterns at    274 /    274 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    159 patterns at    274 /    274 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.43

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 601, avg  length 601
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 393
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     60 patterns at    131 /    131 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     60 patterns at    131 /    131 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.03

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 117, avg  length 116
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1998
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     82 patterns at    583 /    583 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     82 patterns at    583 /    583 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 0.87

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 239, avg  length 239
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 843
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    118 patterns at    281 /    281 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    118 patterns at    281 /    281 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 3.21

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 207, avg  length 207
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 429
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    101 patterns at    143 /    143 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    101 patterns at    143 /    143 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.22

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 640, avg  length 620
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1671
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    113 patterns at    557 /    557 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    113 patterns at    557 /    557 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.44

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 165, avg  length 165
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       

00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1767
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     97 patterns at    535 /    535 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     97 patterns at    535 /    535 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.10

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 405, avg  length 385
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(6%)  Iter   1  100.00%  Align node       

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 630
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     75 patterns at    210 /    210 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     75 patterns at    210 /    210 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 15.18

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 796, avg  length 796
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     14 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     14 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 669
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    100 patterns at    183 /    183 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    100 patterns at    183 /    183 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.90

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 223, avg  length 203
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

00:00:00     15 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     15 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 3057
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    361 patterns at    766 /    766 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    361 patterns at    766 /    766 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.25

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 1196, avg  length 1195
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     16 MB(8%)  Iter   1  100.00%  Align node     

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 492
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     58 patterns at    155 /    155 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     58 patterns at    155 /    155 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.38

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 194, avg  length 194
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1284
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     72 patterns at    366 /    366 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     72 patterns at    366 /    366 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.85

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 543, avg  length 506
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:01     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:01     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

00:00:00     13 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1140
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    178 patterns at    361 /    361 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    178 patterns at    361 /    361 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.96

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 378, avg  length 370
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(6%)  Iter   1  100.00%  Align node       

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1494
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     94 patterns at    397 /    397 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     94 patterns at    397 /    397 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.82

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 360, avg  length 352
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

00:00:00     13 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1227
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     59 patterns at    348 /    348 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     59 patterns at    348 /    348 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.60

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 601, avg  length 601
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:01     13 MB(7%)  Iter   1  100.00%  Align node       

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 810
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    109 patterns at    270 /    270 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    109 patterns at    270 /    270 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 5.11

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 270, avg  length 270
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 603
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     70 patterns at    175 /    175 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     70 patterns at    175 /    175 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.69

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 118, avg  length 117
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

00:00:00     15 MB(8%)  Iter   1  100.00%  Align node       
00:00:00     15 MB(8%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 3183
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     87 patterns at    889 /    889 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     87 patterns at    889 /    889 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.74

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 693, avg  length 679
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     14 MB(7%)  Iter   1  100.00%  Align node       

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 165
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     29 patterns at     55 /     55 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     29 patterns at     55 /     55 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.60

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 55, avg  length 55
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     11 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     11 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from y

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 945
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    155 patterns at    274 /    274 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    155 patterns at    274 /    274 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.34

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 283, avg  length 251
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 432
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     54 patterns at    144 /    144 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     54 patterns at    144 /    144 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.60

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 95, avg  length 95
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from y

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1353
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    143 patterns at    451 /    451 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    143 patterns at    451 /    451 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.20

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 116, avg  length 116
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 588
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     80 patterns at    152 /    152 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     80 patterns at    152 /    152 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.49

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 332, avg  length 331
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 690
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     69 patterns at    188 /    188 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     69 patterns at    188 /    188 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 8.79

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 451, avg  length 451
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 999
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    120 patterns at    316 /    316 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    120 patterns at    316 /    316 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 3.03

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 335, avg  length 333
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       


YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1281
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    115 patterns at    412 /    412 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    115 patterns at    412 /    412 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.55

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 414, avg  length 414
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 579
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     78 patterns at    122 /    122 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     78 patterns at    122 /    122 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.33

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 304, avg  length 303
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 999
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     56 patterns at    333 /    333 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     56 patterns at    333 /    333 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.60

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 600, avg  length 594
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

00:00:00     13 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1419
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    208 patterns at    432 /    432 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    208 patterns at    432 /    432 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.77

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 521, avg  length 520
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       

00:00:00     13 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1239
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    182 patterns at    397 /    397 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    182 patterns at    397 /    397 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.82

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 141, avg  length 140
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1902
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    232 patterns at    549 /    549 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    232 patterns at    549 /    549 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 3.45

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 609, avg  length 607
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 942
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     76 patterns at    314 /    314 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     76 patterns at    314 /    314 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 9.83

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 101, avg  length 100
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 708
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     82 patterns at    176 /    176 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     82 patterns at    176 /    176 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.92

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 414, avg  length 396
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1350
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    186 patterns at    440 /    440 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    186 patterns at    440 /    440 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 3.19

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 447, avg  length 445
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 450
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     67 patterns at    146 /    146 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     67 patterns at    146 /    146 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.57

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 279, avg  length 250
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       


YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 2079
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    181 patterns at    660 /    660 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    181 patterns at    660 /    660 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.68

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 670, avg  length 664
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     14 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     14 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 390
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     52 patterns at    130 /    130 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     52 patterns at    130 /    130 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.60

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 229, avg  length 229
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1677
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     83 patterns at    404 /    404 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     83 patterns at    404 /    404 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 3.64

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 432, avg  length 432
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 756
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    132 patterns at    222 /    222 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    132 patterns at    222 /    222 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.42

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 547, avg  length 479
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1989
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     66 patterns at    493 /    493 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     66 patterns at    493 /    493 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 0.39

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 490, avg  length 489
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1404
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    158 patterns at    459 /    459 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    158 patterns at    459 /    459 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 6.45

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 170, avg  length 170
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1464
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     96 patterns at    478 /    478 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     96 patterns at    478 /    478 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 8.56

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 178, avg  length 174
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 456
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     98 patterns at    143 /    143 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     98 patterns at    143 /    143 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 3.03

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 152, avg  length 147
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1896
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     70 patterns at    632 /    632 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     70 patterns at    632 /    632 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.86

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 262, avg  length 262
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

00:00:00     14 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     14 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 2100
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    188 patterns at    667 /    667 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    188 patterns at    667 /    667 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.97

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 344, avg  length 299
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 240
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     42 patterns at     80 /     80 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     42 patterns at     80 /     80 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.60

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 411, avg  length 345
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 495
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     51 patterns at    131 /    131 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     51 patterns at    131 /    131 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.60

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.


protein 2 seqs, max length 614, avg  length 574
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from 

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 261
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     42 patterns at     67 /     67 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     42 patterns at     67 /     67 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 3.90

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 602, avg  length 550
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 393
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     53 patterns at     89 /     89 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     53 patterns at     89 /     89 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 3.65

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 713, avg  length 648
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     14 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     14 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

00:00:00     15 MB(8%)  Iter   1  100.00%  Align node       
00:00:00     15 MB(8%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 2889
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    161 patterns at    939 /    939 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    161 patterns at    939 /    939 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.60

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 559, avg  length 484
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 363
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     49 patterns at    106 /    106 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     49 patterns at    106 /    106 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 10.36

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 263, avg  length 263
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 558
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     99 patterns at    153 /    153 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     99 patterns at    153 /    153 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.64

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 360, avg  length 329
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 729
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     81 patterns at    188 /    188 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     81 patterns at    188 /    188 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.29

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 946, avg  length 944
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     15 MB(8%)  Iter   1  100.00%  Align node       
00:00:00     15 MB(8%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1902
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     62 patterns at    634 /    634 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     62 patterns at    634 /    634 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.60

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 107, avg  length 107
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 336
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     46 patterns at    112 /    112 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     46 patterns at    112 /    112 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.60

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 1157, avg  length 1017
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     15 MB(8%)  Iter   1  100.00%  Align node       
00:00:00     15 MB(8%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fr

00:00:00     16 MB(8%)  Iter   1  100.00%  Align node       
00:00:00     16 MB(8%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 3621
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    147 patterns at   1175 /   1175 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    147 patterns at   1175 /   1175 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.92

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 848, avg  length 753
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     14 MB(7%)  Iter   1  100.00%  Align node       

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 318
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     58 patterns at     98 /     98 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     58 patterns at     98 /     98 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 7.44

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 79, avg  length 79
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     11 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     11 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from y

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1689
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    131 patterns at    559 /    559 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    131 patterns at    559 /    559 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.54

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 563, avg  length 561
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 528
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     55 patterns at    151 /    151 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     55 patterns at    151 /    151 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.40

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 70, avg  length 70
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     11 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     11 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from y

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 888
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     83 patterns at    296 /    296 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     83 patterns at    296 /    296 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 0.74

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 89, avg  length 87
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from y

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 2712
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     89 patterns at    813 /    813 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     89 patterns at    813 /    813 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.77

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 1010, avg  length 906
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     15 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     15 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fr

00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 480
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     88 patterns at    159 /    159 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     88 patterns at    159 /    159 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.91

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 706, avg  length 693
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     14 MB(7%)  Iter   1  100.00%  Align node       



Reading options from yn00.ctl..

ns = 2  	ls = 390
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     63 patterns at    130 /    130 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     63 patterns at    130 /    130 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 3.81

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 211, avg  length 196
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 633
Reading s

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1395
Reading sequences, sequential format..
Counting site patterns..  0:01
Compressing,    173 patterns at    402 /    402 sites (100.0%),  0:01
Collecting fpatt[] & pose[],    173 patterns at    402 /    402 sites (100.0%),  0:01


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.61

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:01

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 616, avg  length 542
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 2259
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    146 patterns at    510 /    510 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    146 patterns at    510 /    510 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.03

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 635, avg  length 598
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 393
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     54 patterns at    108 /    108 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     54 patterns at    108 /    108 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.71

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 89, avg  length 89
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from y

00:00:00     13 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1437
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    168 patterns at    381 /    381 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    168 patterns at    381 /    381 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.26

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 595, avg  length 555
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 282
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     41 patterns at     94 /     94 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     41 patterns at     94 /     94 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.60

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 127, avg  length 127
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 321
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     54 patterns at    107 /    107 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     54 patterns at    107 /    107 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 6.84

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 156, avg  length 152
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 873
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    121 patterns at    287 /    287 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    121 patterns at    287 /    287 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 3.45

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 96, avg  length 96
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from y

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1617
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    211 patterns at    515 /    515 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    211 patterns at    515 /    515 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.84

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 335, avg  length 335
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1374
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     76 patterns at    450 /    450 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     76 patterns at    450 /    450 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 5.69

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 189, avg  length 185
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1887
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    252 patterns at    624 /    624 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    252 patterns at    624 /    624 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.56

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 89, avg  length 89
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
0

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 636
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     54 patterns at    212 /    212 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     54 patterns at    212 /    212 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.60

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 760, avg  length 741
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     14 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     14 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 585
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     80 patterns at    181 /    181 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     80 patterns at    181 /    181 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.98

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 468, avg  length 423
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1659
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    125 patterns at    445 /    445 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    125 patterns at    445 /    445 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 5.15

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 119, avg  length 119
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1422
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     72 patterns at    451 /    451 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     72 patterns at    451 /    451 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.00

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 740, avg  length 722
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     14 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     14 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

00:00:00     15 MB(8%)  Iter   1  100.00%  Align node       
00:00:00     15 MB(8%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 2886
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    218 patterns at    893 /    893 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    218 patterns at    893 /    893 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 3.96

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 117, avg  length 115
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 594
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     62 patterns at    163 /    163 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     62 patterns at    163 /    163 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 0.62

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 205, avg  length 191
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1461
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    173 patterns at    383 /    383 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    173 patterns at    383 /    383 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.36

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 484, avg  length 435
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 759
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    143 patterns at    250 /    250 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    143 patterns at    250 /    250 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.23

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 253, avg  length 251
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from

00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 795
Reading sequences, sequential format..
Counting site patterns..  0:01
Compressing,     64 patterns at    265 /    265 sites (100.0%),  0:01
Collecting fpatt[] & pose[],     64 patterns at    265 /    265 sites (100.0%),  0:01


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.78

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:01

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 1351, avg  length 1351
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     17 MB(9%)  Iter   1  100.00%  Align node      

00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 705
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     53 patterns at    235 /    235 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     53 patterns at    235 /    235 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.60

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 333, avg  length 333
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       


YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1215
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    139 patterns at    405 /    405 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    139 patterns at    405 /    405 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 2.41

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 265, avg  length 265
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       
00:00:00     12 MB(6%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 2388
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    163 patterns at    703 /    703 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    163 patterns at    703 /    703 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.44

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 637, avg  length 589
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     13 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options fro

00:00:00     15 MB(8%)  Iter   1  100.00%  Align node       
00:00:00     15 MB(8%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 3324
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,    325 patterns at    994 /    994 sites (100.0%),  0:00
Collecting fpatt[] & pose[],    325 patterns at    994 /    994 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 1.66

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 271, avg  length 271
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     12 MB(6%)  Iter   1  100.00%  Align node       

00:00:00     14 MB(7%)  Iter   1  100.00%  Align node       
00:00:00     14 MB(7%)  Iter   1  100.00%  Root alignment
YN00 in paml version 4.9f, October 2017

Reading options from yn00.ctl..

ns = 2  	ls = 1926
Reading sequences, sequential format..
Counting site patterns..  0:00
Compressing,     61 patterns at    642 /    642 sites (100.0%),  0:00
Collecting fpatt[] & pose[],     61 patterns at    642 /    642 sites (100.0%),  0:00


(A) Nei-Gojobori (1986) method



(B) Yang & Nielsen (2000) method

kappa = 4.60

  2 vs.   1


(C) LWL85, LPB93 & LWLm methods

   1   2

Time used:  0:00

MUSCLE v3.8.31 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.



protein 2 seqs, max length 367, avg  length 367
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00     10 MB(5%)  Iter   1  100.00%  K-mer dist pass 2
00:00:00     13 MB(6%)  Iter   1  100.00%  Align node       

In [None]:
########## FIGURE PLOTTING ##########

def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        val = int(round(pct*total/100.0))
        return '{p:.2f}%\n({v:d})'.format(p=pct,v=val)
    return my_autopct

def autolabel(rects, labels, ax, fontsize):
    """
    Attach a text label above each bar displaying its height
    """
    for i, rect in enumerate(rects):
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2., height, str(labels[i]), ha='center', va='bottom', fontsize=fontsize)

def getNumNoAlleles(pProteinFastaFile, alleleDf):
    with open(pProteinFastaFile) as pProteinFasta:
        pProteinList = []
        for line in pProteinFasta:
            if line.startswith('>'):
                pProteinList.append(line[1:].strip())

    assert(len(pProteinList) == len(set(pProteinList)))


    pairedPProteinList = list(alleleDf['Query'])
    pairedPProteinList += list(alleleDf['Target'])
    pairedPProteinList = set(pairedPProteinList)
    
    for pairedPProtein in pairedPProteinList:
        if pairedPProtein in pProteinList:
            pProteinList.remove(pairedPProtein)
    
    return len(pProteinList)

def plotAlleleTypesPie(ax, alleleDf, colors, includeNoAlleles=True):
    '''Plots a pie chart of allele types, with the option of also including 
    primary proteins with no alleles. Strictly, this is not an accurate representation
    of the distribution of primary proteins as the reciprocal BLAST-identified (h on p) alleles
    may result in double-counting of primary proteins.
    '''
    # OrderedDict to preserve order, so that plots are coloured with same key as the distance 
    # bar graphs. This is a bit of a hack-fix; must enter these by hand again in the same order 
    # as 'matchType' occurs in the alleleAveragesByMatchType DataFrame.
    alleleTypeCountDict = collections.OrderedDict()
    
    for matchType in alleleDf['matchType'].unique():
        alleleTypeCountDict[matchType] = len(alleleDf[alleleDf['matchType'] == matchType]['Query'].unique())
    
    if includeNoAlleles:
        numNoAlleles = getNumNoAlleles(P_PROTEINS_FASTA, alleleDf)
        alleleTypeCountDict['no_allele'] = numNoAlleles

    patches, texts, autotexts = ax.pie(list(alleleTypeCountDict.values()), labels=alleleTypeCountDict.keys(), autopct=make_autopct(list(alleleTypeCountDict.values())), colors=colors)
    ax.axis('equal')
    ax.set_title('Allele Types', loc='center', fontsize=TITLE_SIZE, position=(0.5, 1.1))

def plotLevenshteinBar(alleleAverages, ax, colors):
    '''Plots a bar graph of normalised Levenshtein distances on ax from DataFrame alleleAverages.'''
    
    ind = np.arange(len(alleleAverages.protein_levenshtein))
    rects = ax.bar(ind, alleleAverages.protein_levenshtein, 0.35, color=colors, align='center') 
    
    sns.despine(top=True, right=True)
    
    barLabels = []
    for levDist in alleleAverages.protein_levenshtein:
        barLabels.append(str(int((1-levDist)*100)) + '%')
    autolabel(rects, barLabels, ax, INLINE_LABEL_SIZE)

    ax.set_xticks(ind)
    ax.set_xticklabels(alleleAverages.index, rotation=45)

    # ax.set_xlabel('Allele Types', fontsize=AXIS_LABEL_SIZE)
    ax.set_ylabel('Normalised Levenshtein Distance', fontsize=AXIS_LABEL_SIZE)

    ax.tick_params(axis='both', which='major', labelsize=AXIS_TICK_SIZE, pad=3)

    for tick in ax.get_xaxis().get_major_ticks():
        tick.set_pad(2*tick.get_pad())
        tick.label1 = tick._get_text1()
        
def plotAlleles(alleleDf, qCovFilters, tCovFilters, pctIdFilters, levSimFilters, leavePO):
    '''Makes a 3x2 plot with normalised Levenshtein distance plots in column 1 and
    a pie chart representing the distribution of allele types in column 2.
    Each row shows different levels of filtering.
    
    leavePO is a boolean that determines whether only BLAST hits will be filtered (leavePO=True)
    or both BLAST and PO alleles should be filtered (leavePO=False)'''
    cmap = plt.cm.Greens
    colors = cmap(np.linspace(0.0, 0.6, len(alleleDf['matchType'].unique())))
    
    assert(len(qCovFilters) == len(pctIdFilters) == len(levSimFilters))
    
    fig, ax = plt.subplots(len(qCovFilters), 2, figsize=(30, 12*len(qCovFilters)))
    
    for i in range(len(qCovFilters)):
        
        filteredAlleleDf = filterAlleleDf(alleleDf, qCovFilters[i], tCovFilters[i], pctIdFilters[i], levSimFilters[i], True)
        
        # levenshtein distance plot
        alleleAveragesByMatchType = filteredAlleleDf.groupby(['matchType']).mean()
        plotLevenshteinBar(alleleAveragesByMatchType, ax[i, 0], colors)
        ax[i, 0].set_xticklabels(alleleAveragesByMatchType.index, rotation=45, ha='right')
        
        # pie plot
        plotAlleleTypesPie(ax[i, 1], filteredAlleleDf, colors)
        
        # include filtering criteria in title
        qCovFilter = qCovFilters[i]
        tCovFilter = tCovFilters[i]
        pctIdFilter = pctIdFilters[i]
        levSimFilter = levSimFilters[i]
        if qCovFilter < BASE_QCOV_CUTOFF:
            qCovFilter = BASE_QCOV_CUTOFF
            print('Base QCov cut-off is 70%; if you desire to filter below this value, decrease BASE_QCOV_CUTOFF.')
        if tCovFilter < BASE_TCOV_CUTOFF:
            tCovFilter = BASE_TCOV_CUTOFF
            print('Base TCov cut-off is 70%; if you desire to filter below this value, decrease BASE_TCOV_CUTOFF.')
        if pctIdFilter < BASE_PCTID_CUTOFF:
            pctIdFilter = BASE_PCTID_CUTOFF
            print('Base %ID cut-off is 70%; if you desire to filter below this value, decrease BASE_PCTID_CUTOFF.')
        if not levSimFilter:
            levSimFilter = 0
    
        ax[i, 0].set_title('QCov > %s%%, TCov > %s%%, ID > %s%%, L. sim. > %s%%, PO Filtered: %s' % (qCovFilter, tCovFilter, pctIdFilter, levSimFilter, not leavePO), position=(0.5, 0.85))
        
    fig.tight_layout()
    fig.savefig(os.path.join(FIGURE_PATH, 'fig'), bbox_inches='tight')

In [None]:
# Used in the pie chart for all text except title
# the ax.pie plotting interface is weird - cannot set other font sizes properly?
mpl.rcParams['font.size'] = 24

TITLE_SIZE = 32
AXIS_LABEL_SIZE = 28
AXIS_TICK_SIZE = 24
INLINE_LABEL_SIZE = 24

# These lists must all be the same length
qCovFilters = [False, 80, 90, 95]
pctIdFilters = [False, 80, 90, 95]
tCovFilters = [False, 80, 90, 95]
levSimFilters = [False, False, False, False]

plotAlleles(alleleDf, qCovFilters, tCovFilters, pctIdFilters, levSimFilters, True)