In [1]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import re
import requests
import json
from Bio.Align.Applications import ClustalOmegaCommandline

In [2]:
file_path = "data/genome/deinococcus/radiodurans/GCF_000008565.1_ASM856v1/deinococcus_radiodurans_genomic.gbff"
records1 = SeqIO.index(file_path, 'gb')
records1

SeqIO.index('data/genome/deinococcus/radiodurans/GCF_000008565.1_ASM856v1/deinococcus_radiodurans_genomic.gbff', 'gb', alphabet=None, key_function=None)

In [3]:
file_path = "data/genome/thermus/thermophilus/HB8/thermus_thermophilus_genomic.gbff"
records2 = SeqIO.index(file_path, 'gb')
records2

SeqIO.index('data/genome/thermus/thermophilus/HB8/thermus_thermophilus_genomic.gbff', 'gb', alphabet=None, key_function=None)

In [4]:
list(records1.keys())

['NC_001263.1', 'NC_001264.1', 'NC_000959.1', 'NC_000958.1']

In [5]:
list(records2.keys())

['NC_006461.1', 'NC_006462.1', 'NC_006463.1']

In [6]:
url = "http://omabrowser.org/api"

In [31]:
genome_id_1 = 243230
species1 = "Deinococcus_Radiodurans_R1" #ClustalO / PAL2NALで空白より前のみが切り取られるので、_で繋ぐと良い
genome_id_2 = 300852
species2 = "Thermus_thermophilus_HB8"
url_pairs = url + "/pairs" + "/{}/{}/?rel_type=1%3A1".format(genome_id_1, genome_id_2)
print(url_pairs)
response = requests.get(url_pairs)
print(response.status_code)

http://omabrowser.org/api/pairs/243230/300852/?rel_type=1%3A1
200


In [8]:
dic = json.loads(response.content.decode())
dic

[{'entry_1': {'entry_nr': 1873967,
   'entry_url': 'https://omabrowser.org/api/protein/1873967/',
   'omaid': 'DEIRA00001',
   'canonicalid': 'Q9RYE8',
   'sequence_md5': 'e9e74391aba0623622ab48bdebb1d53f',
   'oma_group': 918337,
   'oma_hog_id': 'HOG:0685079.4b.2a',
   'chromosome': 'Chromosome I',
   'locus': {'start': 1, 'end': 1182, 'strand': -1},
   'is_main_isoform': True},
  'entry_2': {'entry_nr': 1892734,
   'entry_url': 'https://omabrowser.org/api/protein/1892734/',
   'omaid': 'THET800001',
   'canonicalid': 'Q5SME2',
   'sequence_md5': '2cc303f4e9838ae6ed2afe6535c47a39',
   'oma_group': 918339,
   'oma_hog_id': 'HOG:0685079.4b',
   'chromosome': 'Chromosome',
   'locus': {'start': 55, 'end': 1182, 'strand': 1},
   'is_main_isoform': True},
  'rel_type': '1:1',
  'distance': 86.0,
  'score': 1043.4200439453125},
 {'entry_1': {'entry_nr': 1873968,
   'entry_url': 'https://omabrowser.org/api/protein/1873968/',
   'omaid': 'DEIRA00002',
   'canonicalid': 'DNAA_DEIRA',
   'sequ

In [9]:
ortho_location_dict = [
    [entry['entry_1']['locus']['start'], entry['entry_1']['locus']['end'], entry['entry_1']['locus']['strand'], entry['entry_1']['chromosome'], entry['entry_2']['locus']['start'], entry['entry_2']['locus']['end'], entry['entry_2']['locus']['strand'], entry['entry_2']['chromosome']]
    for entry in dic
    if entry['entry_1']['chromosome'] == 'Chromosome I'
]
ortho_location_dict

[[1, 1182, -1, 'Chromosome I', 55, 1182, 1, 'Chromosome'],
 [1904, 3304, -1, 'Chromosome I', 1848185, 1849495, 1, 'Chromosome'],
 [6162, 6845, -1, 'Chromosome I', 1222960, 1223634, 1, 'Chromosome'],
 [6986, 7822, 1, 'Chromosome I', 310968, 311693, -1, 'Chromosome'],
 [13333, 14199, -1, 'Chromosome I', 1844044, 1844853, -1, 'Chromosome'],
 [14183, 15073, -1, 'Chromosome I', 1844837, 1845586, -1, 'Chromosome'],
 [15101, 15862, -1, 'Chromosome I', 1845580, 1846329, -1, 'Chromosome'],
 [16307, 17047, -1, 'Chromosome I', 1327678, 1328442, -1, 'Chromosome'],
 [19258, 20643, 1, 'Chromosome I', 1459112, 1460908, 1, 'Chromosome'],
 [22233, 22766, 1, 'Chromosome I', 478846, 479340, -1, 'Chromosome'],
 [22763, 23875, 1, 'Chromosome I', 477740, 478849, -1, 'Chromosome'],
 [23983, 25182, 1, 'Chromosome I', 67430, 68548, 1, 'Chromosome'],
 [25273, 26082, 1, 'Chromosome I', 80373, 81125, 1, 'Plasmid pTT27'],
 [26166, 27968, -1, 'Chromosome I', 1846329, 1848122, -1, 'Chromosome'],
 [29796, 30941, 1, '

In [10]:
ortho_location_df = pd.DataFrame(ortho_location_dict, columns=['start1', 'end1', 'strand1', 'chromosome1', 'start2', 'end2', 'strand2', 'chromosome2'])
ortho_location_df

Unnamed: 0,start1,end1,strand1,chromosome1,start2,end2,strand2,chromosome2
0,1,1182,-1,Chromosome I,55,1182,1,Chromosome
1,1904,3304,-1,Chromosome I,1848185,1849495,1,Chromosome
2,6162,6845,-1,Chromosome I,1222960,1223634,1,Chromosome
3,6986,7822,1,Chromosome I,310968,311693,-1,Chromosome
4,13333,14199,-1,Chromosome I,1844044,1844853,-1,Chromosome
5,14183,15073,-1,Chromosome I,1844837,1845586,-1,Chromosome
6,15101,15862,-1,Chromosome I,1845580,1846329,-1,Chromosome
7,16307,17047,-1,Chromosome I,1327678,1328442,-1,Chromosome
8,19258,20643,1,Chromosome I,1459112,1460908,1,Chromosome
9,22233,22766,1,Chromosome I,478846,479340,-1,Chromosome


In [11]:
#見つかったホモログはgenome1のChromosome Iからのみであることを確認
# genome2は、ChromosomeとPlasmid pTT27の両方に存在していることを確認
for entry in dic:
    if entry['entry_1']['chromosome'] != 'Chromosome I':
        print("geonome1")
        print(entry['entry_1']['chromosome'])
    if entry['entry_2']['chromosome'] != 'Chromosome':
        print("geonome2")
        print(entry['entry_2']['chromosome'])

geonome2
Plasmid pTT27
geonome2
Plasmid pTT27
geonome2
Plasmid pTT27
geonome2
Plasmid pTT27


In [12]:
# DeinococcusのCDSのstart, endの整理
genome1_location_dict = {
    record_id: [
        [ feature.location.start, feature.location.end, feature.location.strand, feature.qualifiers['translation'][0]]
        for feature in record.features 
        if feature.type == "CDS"
    ]
    for record_id, record in records1.items()
}

genome1_location_dict

{'NC_001263.1': [[ExactPosition(0),
   ExactPosition(1182),
   -1,
   'MMKANVTKKTLNEGLGLLERVIPSRSSNPLLTALKVETSEGGLTLSGTNLEIDLSCFVPAEVQQPENFVVPAHLFAQIVRNLGGELVELELSGQELSVRSGGSDFKLQTGDIEAYPPLSFPAQADVSLDGGELSRAFSSVRYAASNEAFQAVFRGIKLEHHGESARVVASDGYRVAIRDFPASGDGKNLIIPARSVDELIRVLKDGEARFTYGDGMLTVTTDRVKMNLKLLDGDFPDYERVIPKDIKLQVTLPATALKEAVNRVAVLADKNANNRVEFLVSEGTLRLAAEGDYGRAQDTLSVTQGGTEQAMSLAFNARHVLDALGPIDGDAELLFSGSTSPAIFRARRWGRRVYGGHGHAARLRGLLRPLRGMSALAHHPESSPPLEPRPEFA'],
  [ExactPosition(1903),
   ExactPosition(3268),
   -1,
   'MRKNVSDLEYTTWFAPVKPLGVQEGSLLLGVRNSFTKDWFRDHYLELLLAALRSLGAEHPQVEFQVLPAAQDALLLPNDPPPAPEAAAPTPKTKAAPTPPPSTPGDNRKTLNPKYTFENFVVGPNNNLAHAAALAVAESPGKAYNPLFIYGDVGLGKTHLMHAVGHYLAERFPEKRIEYVSTETFTNELINAIRDDKTTQFRNRYRSVDLLLVDDIQFLAGKERTQEEFFHTFNALYESNKQIILSSDRPPKDIQTLEGRLRSRFEWGLITDIQSPEYETRVAILKMNAEQGHITIPQEVLELIARQVTSNIRELEGALMRVVAFASLNNVPFSRAAAAKALSNVFAPQEAKVEMTDVLRQVAAHYGTTPDLIRGSGRARDIVVPRQVAQYLIRALTDHSLPEIGQFFGRDHSTVMHAVSKITEQMGKDPELAATVNTLRNRIQGKEEEEEVGA'],
  [ExactPositio

In [13]:
genome1_location_df = pd.DataFrame(genome1_location_dict[list(records1.keys())[0]], columns=['start', 'end', 'strand', 'translation'])
genome1_location_df

Unnamed: 0,start,end,strand,translation
0,0,1182,-1,MMKANVTKKTLNEGLGLLERVIPSRSSNPLLTALKVETSEGGLTLS...
1,1903,3268,-1,MRKNVSDLEYTTWFAPVKPLGVQEGSLLLGVRNSFTKDWFRDHYLE...
2,3411,4287,1,MGTGDPSPLASQGPLPLVEGQKMKKPPPVRRGMNEAMEDRGSFFMA...
3,4377,5430,1,MVVMSYLSELRAVWGHRALPAAGVSVLLQDETGRVLLQRRGDDGQW...
4,5437,6115,1,MSAVIHLQALGLTEYEARAYTALLALGRAVPARVARQAGIPRPKIY...
5,6161,6944,-1,MKRLRSGGGDKRAQHPHAVAPPHSYSGVPGGNLMLKIRSLGHSTFF...
6,6985,7822,1,MPVGDRRSLLPLTTLSSLNLLGITLRDLLDILLVAALLYQGYKLVV...
7,7818,8859,1,MSGGEAQGKRKQWQRWLSPRYVWRRLRHNIGPKLVSLGAALVLWSV...
8,8924,10283,1,MTKFVSAPLICGLALLGALSAAHAQTGIPALPPIAQPLPVPAPAPV...
9,10412,11615,1,MKSPLPMGLMAILQYVLSAVPLRKTQRNFLTVLLSVFLAVPGRLNA...


In [14]:
# ThermusのCDSのstart, endの整理
genome2_location_dict = {
    record_id: [
        [ feature.location.start, feature.location.end, feature.location.strand, feature.qualifiers['translation'][0]]
        for feature in record.features 
        if feature.type == "CDS"
    ]
    for record_id, record in records2.items()
}

genome2_location_dict

{'NC_006461.1': [[ExactPosition(54),
   ExactPosition(1182),
   1,
   'MNITVPKKLLSDQLSLLERIVPSRSANPLYTYLGLYAEEGALILFGTNGEVDLEVRLPAEAQSLPRVLVPAQPFFQLVRSLPGDLVALGLASEPGQGGQLELSSGRFRTRLSLAPAEGYPELLVPEGEDKGAFPLRTRMPSGELVKALTHVRYAASNEEYRAIFRGVQLEFSPQGFRAVASDGYRLALYDLPLPQGFQAKAVVPARSVDEMVRVLKGADGAEADLALGEGVLALALEGGSGVRMALRLMEGEFPDYQRVIPQEFALKVQVEGEALREAVRRVSVLSDRQNHRVDLLLEEGRILLSAEGDYGKGQEEVPAQVEGPGMAVAYNARYLLEALAPVGDRAHLGISGPTSPSLIWGDGEGYRAVVVPLRV'],
  [ExactPosition(1234),
   ExactPosition(2503),
   1,
   'MTTIVGVRAREVLDSRGFPTVEAEVELEGGARGRAMVPSGASTGTHEALELRDGGKRYLGKGVRRAVENVNERIAPELVGMDALDQEGVDRAMLELDGTPNKANLGANAVLAVSLAVARAAAEALGLPLYRYLGGVQGVTLPVPLMNVINGGKHADNRVDFQEFMLVPAGAGSFAEALRIGAEVFHTLKAVLKEKGYSTNVGDEGGFAPDLRSNEEAVELLLLAIERAGYTPGQEVSLALDPATSELYRDGKYHLEGEGKVLSSEEMVAFWEAWVEKYPIRSIEDGLAEDDWEGWRLLTERLGGKVQLVGDDLFVTNPERLRAGIERGVANAILVKVNQIGTLSETLEAIRLAQRSGYRAVISHRSGETEDSFIADLAVAVNAGQIKTGSLSRSDRLAKYNQLLRIEEELGRAARFLGYAAF'],
  [ExactPosition(2489),
   ExactPosition(3914),
   1,
   'MPPFKRTK

In [15]:
genome2_location_df = pd.DataFrame(genome2_location_dict[list(records2.keys())[0]], columns=['start', 'end', 'strand', 'translation'])
genome2_location_df

Unnamed: 0,start,end,strand,translation
0,54,1182,1,MNITVPKKLLSDQLSLLERIVPSRSANPLYTYLGLYAEEGALILFG...
1,1234,2503,1,MTTIVGVRAREVLDSRGFPTVEAEVELEGGARGRAMVPSGASTGTH...
2,2489,3914,1,MPPFKRTKIVATLGPATDDKEVIRALAEAGADVFRLNFSHGAPEDH...
3,3914,4895,-1,MRKRLALLALGLSALAQEVAVYPGFAEVKEPVDLPPAAWVYLAGEK...
4,4891,5506,-1,MNGPEVLRFAANLYRVPVEGGYFLVDAGLPWEARRLLSLLQEPPRL...
5,5492,7340,-1,MILDKVNSPEDLKRLSLEELLLLAEEIRSEIIRVTAQNGGHLASSL...
6,7348,7993,-1,MKRYLMALALLVAACAPQVTQAPEVRPELRVEAFYPAATGLEWSYL...
7,8036,8699,-1,MTLWDRLSRLVRANLNDLLRRAEDPEKIIEQALLDMKQALREAREQ...
8,9021,9114,1,MARALLELIAYGLRVLLSLLPPPPGHKAIY
9,12492,13503,1,MVLRWAAKGDKVKLLDNYGRVIKDLRISVTPRCNLHCLYCHPLGLE...


In [43]:
#CodeML controlファイル
#各遺伝子ごとにファイル名を受け取って、ctlファイルの中身を作成する
def create_codeml_ctl_file(codeml_ctl_file, seq_file, tree_file, out_file):
    with open(codeml_ctl_file, 'w') as f:
        f.write("seqfile = {}\n".format(seq_file))
        f.write("treefile = {}\n".format(tree_file))
        f.write("outfile = {}\n".format(out_file))
    

        s= "\nnoisy = 0   * 0,1,2,3,9: how much rubbish on the screen\n \
          verbose = 0   * 1: detailed output, 0: concise output\n \
          runmode = -2  * 0: user tree;  1: semi-automatic;  2: automatic\n \
          * 3: StepwiseAddition; (4,5):PerturbationNNI; -2: pairwise\n \
          cleandata = 1   * I added on 07/07/2004 Mikita Suyama\n \
          seqtype = 1   * 1:codons; 2:AAs; 3:codons-->AAs\n \
          CodonFreq = 2   * 0:1/61 each, 1:F1X4, 2:F3X4, 3:codon table\n \
          model = 2  * models for codons: \n \
          * 0:one, 1:b, 2:2 or more dN/dS ratios for branches\n \
          NSsites = 0   * dN/dS among sites. 0:no variation, 1:neutral, 2:positive\n \
          icode = 10   * 0:standard genetic code; 1:mammalian mt; 2-10:see below\n \
          Mgene = 0   * 0:rates, 1:separate; 2:pi, 3:kappa, 4:all\n \
          fix_kappa = 0   * 1: kappa fixed, 0: kappa to be estimated\n \
          kappa = 2   * initial or fixed kappa\n \
          fix_omega = 0   * 1: omega or omega_1 fixed, 0: estimate\n \
          omega = 1   * initial or fixed omega, for codons or codon-transltd AAs\n \
          fix_alpha = 1   * 0: estimate gamma shape parameter; 1: fix it at alpha\n \
          alpha = .0  * initial or fixed alpha, 0:infinity (constant rate)\n \
          Malpha = 0   * different alphas for genes\n \
          ncatG = 4   * # of categories in the dG or AdG models of rates\n\n \
          clock = 0   * 0: no clock, unrooted tree, 1: clock, rooted tree\n \
          getSE = 0   * 0: don't want them, 1: want S.E.s of estimates\n \
          RateAncestor = 0   * (1/0): rates (alpha>0) or ancestral states (alpha=0)\n \
          method = 0   * 0: simultaneous; 1: one branch at a time\n"
        
        f.write(s)


In [32]:
codeml_tree_file = "codeml_control/{}_{}.tree".format(species1, species2)
with open(codeml_tree_file, 'w') as f:
    f.write("({}, {});".format(species1, species2))
    

In [71]:
for index, ortho in ortho_location_df.iterrows():
    print("\n----------------------------------------------\nindex = {} is starting\n".format(index))
    if ortho.chromosome2 != "Chromosome":
        continue
#     if index != 19:
#         continue
   
    clustal_in_file_pro = "clustalo_input/{}_{}_gene{}_unaligned.pro".format(species1, species2, index)
    pal2nal_in_file_nuc = "pal2nal_input/{}_{}_gene{}_unaligned.nuc".format(species1, species2, index)

    if ortho.strand1 == 1:
        ortho1_nuc = records1[list(records1.keys())[0]].seq[ortho.start1-1:ortho.end1]
    elif ortho.strand1 == -1:
        ortho1_nuc = records1[list(records1.keys())[0]].seq[ortho.start1-1:ortho.end1].reverse_complement()
    ortho1_pro = genome1_location_df[genome1_location_df.start == ortho.start1-1].translation
    
    if len(ortho1_pro) == 0:
        continue
    #print(ortho1_nuc)
    #print(ortho1_nuc.reverse_complement())
    print(ortho)
    print(ortho.start1)
    print(ortho1_pro)
    print(ortho1_pro.values[0])
    
    if ortho.strand2 == 1:
        ortho2_nuc = records2[list(records2.keys())[0]].seq[ortho.start2-1:ortho.end2]
    elif ortho.strand2 == -1:
        ortho2_nuc = records2[list(records2.keys())[0]].seq[ortho.start2-1:ortho.end2].reverse_complement()
    ortho2_pro = genome2_location_df[genome2_location_df.start == ortho.start2-1].translation
#     print(ortho2_nuc)
    if len(ortho2_pro) == 0:
        continue
    print(ortho2_pro.values)
    
    with open(clustal_in_file_pro, mode='w') as f:
        f.write(">{} gene{}\n".format(species1, index))
        f.write(str(ortho1_pro.values[0]) + "\n")
        f.write(">{} gene{}\n".format(species2, index))
        f.write(str(ortho2_pro.values[0]) + "\n")
        
    with open(pal2nal_in_file_nuc, mode='w') as f:
        f.write(">{} gene{}\n".format(species1, index)) #CodeMLは>のあとにスペースをあけるとエラーになる
        f.write(str(ortho1_nuc) + "\n")
        f.write(">{} gene{}\n".format(species2, index))
        f.write(str(ortho2_nuc) + "\n")
        
    #Clustal Omegaの実行
    print("\n======================\nClustalO starting\n")
    clustal_out_file_pro = "clustalo_output/gene{}_aligned.pro".format(index)
    !clustalo -i $clustal_in_file_pro -o $clustal_out_file_pro --auto -v --force
    
    
    print("\n======================\nPAL2NAL starting\n")
    #PAL2NALの実行
    pal2nal_out_file = "pal2nal_output/{}_{}_gene{}_aligned_codon_based.nuc".format(species1, species2, index)
    !./pal2nal.v14/pal2nal.pl $clustal_out_file_pro $pal2nal_in_file_nuc -codontable 11 -output fasta > $pal2nal_out_file
    
    
    print("\n======================\nConversion to PHYLIP starting\n")
    #PHYLIPフォーマットへの変換
    with open(pal2nal_out_file) as f:
        l = f.readlines()
        #print(l)
        if(len(l) == 0):
            continue
        ls = []
        seq_len = 60*(int(len(l)/2)-2) + len(l[int(len(l)/2)-1]) - 1
        ls.append("\t2\t{}\n".format(seq_len))
        for i in range(len(l)):
            if(i == 0 or i == len(l)/2):
                ls.append(l[i].replace('>', ''))
            else:
                ls.append(l[i])
        codeml_in_file_nuc = "codeml_input/{}_{}_gene{}_aligned_phylip.nuc".format(species1, species2, index)
        with open(codeml_in_file_nuc, mode='w') as fw:
            fw.writelines(ls)
    
    
    print("\n======================\nCreating CodeML control files\n")
    #CodeMLのcontrolファイル作成
    codeml_ctl_file = "codeml_control/{}_{}_gene{}.ctl".format(species1, species2, index)
    codeml_out_file = "codeml_output/{}_{}_gene{}.codeml".format(species1, species2, index)
    
    create_codeml_ctl_file(codeml_ctl_file, codeml_in_file_nuc, codeml_tree_file, codeml_out_file)
    
    !codeml/bin/codeml $codeml_ctl_file

    
#     if(index > 10):
#         break
    


----------------------------------------------
index = 0 is starting

start1                    1
end1                   1182
strand1                  -1
chromosome1    Chromosome I
start2                   55
end2                   1182
strand2                   1
chromosome2      Chromosome
Name: 0, dtype: object
1
0    MMKANVTKKTLNEGLGLLERVIPSRSSNPLLTALKVETSEGGLTLS...
Name: translation, dtype: object
MMKANVTKKTLNEGLGLLERVIPSRSSNPLLTALKVETSEGGLTLSGTNLEIDLSCFVPAEVQQPENFVVPAHLFAQIVRNLGGELVELELSGQELSVRSGGSDFKLQTGDIEAYPPLSFPAQADVSLDGGELSRAFSSVRYAASNEAFQAVFRGIKLEHHGESARVVASDGYRVAIRDFPASGDGKNLIIPARSVDELIRVLKDGEARFTYGDGMLTVTTDRVKMNLKLLDGDFPDYERVIPKDIKLQVTLPATALKEAVNRVAVLADKNANNRVEFLVSEGTLRLAAEGDYGRAQDTLSVTQGGTEQAMSLAFNARHVLDALGPIDGDAELLFSGSTSPAIFRARRWGRRVYGGHGHAARLRGLLRPLRGMSALAHHPESSPPLEPRPEFA
['MNITVPKKLLSDQLSLLERIVPSRSANPLYTYLGLYAEEGALILFGTNGEVDLEVRLPAEAQSLPRVLVPAQPFFQLVRSLPGDLVALGLASEPGQGGQLELSSGRFRTRLSLAPAEGYPELLVPEGEDKGAFPLRTRMPSGELVKALTHVRYAASNEEYRAIFRGVQLEFSPQGFRAVASDGYRLALYDLPLPQG

['MSLTWRDLLDIFLVGVLLYSLYRILAGTRALNLVRGVLIYLATWFLASLLGLSTLSWILGNAATLGAFALIVVFQPELRGLLERLGRGQGALRPPPVALEMEELLLGLRRLAERRHGALLALERRTPLGEYAASGEVLDARLSARLLETLFYPGTPLHDGGAIVREGRLFAAGCVFPLSEVGMGLGTRHRAALGLSEVSDALVIVVSEETGAIRVAEGGRLSPPLSLEALRARLKEVVRDA']

ClustalO starting

Using 1 threads
Read 2 sequences (type: Protein) from clustalo_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene3_unaligned.pro
not more sequences (2) than cluster-size (100), turn off mBed
Setting options automatically based on input sequence characteristics (might overwrite some of your options).
Auto settings: Enabling mBed.
Auto settings: Setting iteration to 1.
Progressive alignment progress done. CPU time: 0.01u 0.00s 00:00:00.01 Elapsed: 00:00:00
Iteration step 1 out of 1
Computing new guide tree (iteration step 0)
Computing HMM from alignment
Progressive alignment progress done. CPU time: 0.03u 0.00s 00:00:00.03 Elapsed: 00:00:00
Alignment written to clustalo_output/gene3_aligned.pro

PAL2NAL starting

#

CODONML in paml version 4.8a, August 2014


----------------------------------------------
index = 7 is starting

start1                16307
end1                  17047
strand1                  -1
chromosome1    Chromosome I
start2              1327678
end2                1328442
strand2                  -1
chromosome2      Chromosome
Name: 7, dtype: object
16307
15    MDLSQARAALKAARRVAVLTGAGISAESGIPTFRDAQTGHWARFRP...
Name: translation, dtype: object
MDLSQARAALKAARRVAVLTGAGISAESGIPTFRDAQTGHWARFRPEDLASPDAYRRDPDLVWEWYAGRYRDVLAAQPNRGHELLAELERRKGPGFFLATQNVDGLHARAGSGSAGGELVELHGNLLQARDELTGEVFPLAAPDELTLPPLSPNGQRMRPHIVWFGEYLPVDALDAAQRAFAGAEVALVIGTSSVVYPAAGLAAETLRRGGAVIEINPEATDLTPDATFSLRESASRGLELLLEDD
['MERLEEARKRLEEARRVAVLTGAGISKPSGIPTFRDAEGLWKNFNPLDYATPEAYARDPEKVWAWYAWRIQKVREAKPNPAHYALVELERRILSRGGSFLLVTQNVDGLHALAGSQNLVELHGNLLRARCEACGKRFPLPEAFAPPPFCPACGHRARPDVVWFGEFLPEGAWERAERAFAEADFALVVGTSAEVEPAASLGRIAFASGAYLVEVNPEPTPLTPLAHLSLRTGAVEGMALLLPPSPEDQAEGHLS']

ClustalO starting

Using 1 threads
Re

Progressive alignment progress done. CPU time: 0.06u 0.01s 00:00:00.06 Elapsed: 00:00:01
Alignment written to clustalo_output/gene10_aligned.pro

PAL2NAL starting

#------------------------------------------------------------------------#
#  Input files:  clustalo_output/gene10_aligned.pro pal2nal_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene10_unaligned.nuc
#  Codontable 11 is used
#------------------------------------------------------------------------#


Conversion to PHYLIP starting


Creating CodeML control files

CODONML in paml version 4.8a, August 2014

codon     181: AGC TCC 

----------------------------------------------
index = 11 is starting

start1                23983
end1                  25182
strand1                   1
chromosome1    Chromosome I
start2                67430
end2                  68548
strand2                   1
chromosome2      Chromosome
Name: 11, dtype: object
23983
24    MKRFLLPLLLTWPGAALAAQPAPQLALAQDLQLTFKGELKKIDDGQ...
Name: tr

Progressive alignment progress done. CPU time: 0.07u 0.01s 00:00:00.08 Elapsed: 00:00:00
Alignment written to clustalo_output/gene14_aligned.pro

PAL2NAL starting

#------------------------------------------------------------------------#
#  Input files:  clustalo_output/gene14_aligned.pro pal2nal_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene14_unaligned.nuc
#  Codontable 11 is used
#------------------------------------------------------------------------#


Conversion to PHYLIP starting


Creating CodeML control files

CODONML in paml version 4.8a, August 2014

codon     218: TCC AGC 

----------------------------------------------
index = 15 is starting

start1                30938
end1                  31972
strand1                   1
chromosome1    Chromosome I
start2               223060
end2                 224034
strand2                   1
chromosome2      Chromosome
Name: 15, dtype: object
30938
29    MTATQEKQAQGKQAGTEQPAVRTLNLIQAITEALADELERDPNVVL...
Name: tr


Conversion to PHYLIP starting


Creating CodeML control files

CODONML in paml version 4.8a, August 2014

codon      57: AGC TCG 
codon      98: AGT TCG 
codon     230: AGC TCC 

----------------------------------------------
index = 18 is starting

start1                36181
end1                  36831
strand1                   1
chromosome1    Chromosome I
start2              1764078
end2                1764740
strand2                  -1
chromosome2      Chromosome
Name: 18, dtype: object
36181
35    MPQARGEGATPPTSLPNPSLKGVPLPDNPNNLEPLTREWLAAIGED...
Name: translation, dtype: object
MPQARGEGATPPTSLPNPSLKGVPLPDNPNNLEPLTREWLAAIGEDPDREGLQRTPQRVAKAWAYMTEGYGQTLAQVVGEGVFAAEGSEMVIVKDIEFYSMCEHHMLPFYGRAHVAYIPGTRILGLSKFARIVDLYSRRLQVQERITTQVADAVEELLAPKGVAVLMEGIHLCMAMRGVQKQNSSTTTSAMRGLFRSDPRTRAEFMSAVQGTLRGR
['MSPGPQSGGQERGSMERKMVELEDTGLTFATEVDLERLQALAAEWLQVIGEDPGREGLLKTPERVAKAWAFLTRGYRQRLEEVVGGAVFPAEGSEMVVVKGVEFYSMCEHHLLPFFGKVHIGYIPDGKILGLSKFARIVDMFARRLQVQERLAVQIAEAIQEVLEPQGVGVVVEGVHLCMMMRGVEK

['MDHLDDLVDLYEYRVEDLLQGRTPKGGKQALLRLRQLLIQSRLPGPLAKRFRQADARFRAQRRALAPEAQAPVELPAIAVPEEPEPPPPEASPLAALALKVWRLQVERDVKARLEALLAGRREELRLIHAFLDNFALYRETPGFKRDFNLSRFVPTRPIPSLSDTLVDLDDPKVAQALVVDFLETARELPKLLPLPPEETRTYVRRFLNRLLEWEGAYNLPPKPDLPALRRALEEARRLGAGEKEVAQLEERLRKAAQEARRRDLLLEEEKGRFRVALEKVVALLSLLPTPQGETPWPRVPEPGQEEEGLLTLRLAPGPVALGPLTLTLSHAGGTWYLGLEGEDHPLEDTLVLPWEDLEVWAVRENDLLHLRLEARSGLRLYELLAEGRLLAYLLHPGKDYAYLRLLRGLSTRLKGEFQAQAFGPALAEKYRKAPEEALQDFARKGLELTLKRLGQADPLPLLQEVGQALGLEAEAQTLGQALREYLGRRPPTRETLGGEVHFLALTPEPQALKLDQHVLSVRLKEDAVYLGQAGEVPRRLKDLLVYRLGERALVLAREGRRLAYTLLPLP']

ClustalO starting

Using 1 threads
Read 2 sequences (type: Protein) from clustalo_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene23_unaligned.pro
not more sequences (2) than cluster-size (100), turn off mBed
Setting options automatically based on input sequence characteristics (might overwrite some of your options).
Auto settings: Enabling mBed.
Auto settings: Setting iteration to 1.
Progressive alignment

Progressive alignment progress done. CPU time: 0.09u 0.01s 00:00:00.10 Elapsed: 00:00:01
Iteration step 1 out of 1
Computing new guide tree (iteration step 0)
Computing HMM from alignment
Progressive alignment progress done. CPU time: 0.30u 0.04s 00:00:00.34 Elapsed: 00:00:00
Alignment written to clustalo_output/gene26_aligned.pro

PAL2NAL starting

#------------------------------------------------------------------------#
#  Input files:  clustalo_output/gene26_aligned.pro pal2nal_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene26_unaligned.nuc
#  Codontable 11 is used
#------------------------------------------------------------------------#


Conversion to PHYLIP starting


Creating CodeML control files

CODONML in paml version 4.8a, August 2014


----------------------------------------------
index = 27 is starting

start1                64184
end1                  65113
strand1                   1
chromosome1    Chromosome I
start2              1290216
end2          


Conversion to PHYLIP starting


Creating CodeML control files

CODONML in paml version 4.8a, August 2014

codon     248: AGC TCG 
codon     252: AGC TCC 
codon     301: AGC TCC 

----------------------------------------------
index = 30 is starting

start1                80645
end1                  81571
strand1                  -1
chromosome1    Chromosome I
start2              1143166
end2                1144071
strand2                  -1
chromosome2      Chromosome
Name: 30, dtype: object
80645
79    MTTLDRGGVYFFPMTKAARSSKKSAPSVPTPAQARLHTPDTLPRPV...
Name: translation, dtype: object
MTTLDRGGVYFFPMTKAARSSKKSAPSVPTPAQARLHTPDTLPRPVLAGRDFLSNLDMTSAELRAVMDTAHSMKAGEWRAVKPLSGLSLALVFEKASLRTRTTFDVGMYQLGGHAITLSNTEIGLGTRERVSDVARNLERWVDGVMGRVYLQQTLVELAQHARIPVINGLSDMLHPAQLLADYQTIEEEFGQDLRGKRVVYIGDGNNLANSHIHMGILTGTDVTVVTPVGYEPNAGVLMDAVKAGVEVHLTNDLDAIQGADVLYTDVWISMGQEAEADIRRRAFRGYQVTPEMLETISPDGIFLHCLPAHYGEETVPEATEHPKSRVFDQAENRLHAQKALLYHVLGDMKPRW
['MGGEALTLPKDLLDFSGYGPKELQALLDLAEQLKRERYRGEDLKGKVLAL

CODONML in paml version 4.8a, August 2014

codon      12: TCG AGC 
codon     279: AGC TCC 
codon     322: AGC TCC 

----------------------------------------------
index = 33 is starting

start1                86474
end1                  87790
strand1                  -1
chromosome1    Chromosome I
start2              1666774
end2                1668024
strand2                  -1
chromosome2      Chromosome
Name: 33, dtype: object
86474
83    MAFRDVLNIEVAAGNGGDGSMSFHRAKYMEKGGPDGGHGGRGGSII...
Name: translation, dtype: object
MAFRDVLNIEVAAGNGGDGSMSFHRAKYMEKGGPDGGHGGRGGSIILRAIEGVESLERLVGRRKFKAENGRYGEGRLRQGADGQDTYIDVPVGTTAFDEDSGKVIADLVNVGQEKVIAKGGLGGRGNSTFTSSTRQAPRFAELGTPGQKRRVRLELRLIADVGLVGYPNAGKSSLLAALSRANPAIADYPFTTLSPILGVVQREDEQGVSLDERFTMADIPGIIEGASEGKGLGLEFLRHISRTRLLVYVLDVTRNPVEELQQLQAELRAYDPSLLDNVALVALNKVELVEPDLAQMVEDELAEQGLPVLQVSAKEGTGLNTLRETLFQLLPEFELWAQSNALEVEPDTVVDEALQIVFREDAPAKGEGAPERVWEVHGGGFEERIVRFSRYLEDAAEYLGNLFKRQGLYNALRRAGAREGDTVEIGTFRFEYFDDEE
['MFQDVLVITVAAGRGGDGAVSFRREKFVP


PAL2NAL starting

#------------------------------------------------------------------------#
#  Input files:  clustalo_output/gene36_aligned.pro pal2nal_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene36_unaligned.nuc
#  Codontable 11 is used
#------------------------------------------------------------------------#


Conversion to PHYLIP starting


Creating CodeML control files

CODONML in paml version 4.8a, August 2014

codon      19: AGC TCC 
codon     126: AGT TCC 
codon     133: AGT TCG 

----------------------------------------------
index = 37 is starting

start1                89343
end1                  90140
strand1                  -1
chromosome1    Chromosome I
start2               130031
end2                 130738
strand2                   1
chromosome2      Chromosome
Name: 37, dtype: object
89343
87    MRGWPAMLAAMTSSAGGAFSPSPAQRGLLTNQVIAVTSADQGYGRP...
Name: translation, dtype: object
MRGWPAMLAAMTSSAGGAFSPSPAQRGLLTNQVIAVTSADQGYGRPIGTALAHAGASVILVGTAPENLAAIA


PAL2NAL starting

#------------------------------------------------------------------------#
#  Input files:  clustalo_output/gene43_aligned.pro pal2nal_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene43_unaligned.nuc
#  Codontable 11 is used
#------------------------------------------------------------------------#


Conversion to PHYLIP starting


Creating CodeML control files

CODONML in paml version 4.8a, August 2014


----------------------------------------------
index = 44 is starting


----------------------------------------------
index = 45 is starting

start1               110187
end1                 110807
strand1                   1
chromosome1    Chromosome I
start2              1527505
end2                1528101
strand2                   1
chromosome2      Chromosome
Name: 45, dtype: object
110187
110    MSQGLFITLEGPEGAGKTTQLARLEARLRAAGHAVTVTREPGGTPL...
Name: translation, dtype: object
MSQGLFITLEGPEGAGKTTQLARLEARLRAAGHAVTVTREPGGTPLGTRVREVVLDPAVEIEPLGEFLLY


PAL2NAL starting

#------------------------------------------------------------------------#
#  Input files:  clustalo_output/gene48_aligned.pro pal2nal_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene48_unaligned.nuc
#  Codontable 11 is used
#------------------------------------------------------------------------#


Conversion to PHYLIP starting


Creating CodeML control files

CODONML in paml version 4.8a, August 2014

codon      56: AGC TCC 

----------------------------------------------
index = 49 is starting

start1               118232
end1                 119344
strand1                  -1
chromosome1    Chromosome I
start2               121447
end2                 122451
strand2                   1
chromosome2      Chromosome
Name: 49, dtype: object
118232
119    MTLPSPAAAHAELLALLTLRFTPGLGPRRIENLRRHFGSAEAALAA...
Name: translation, dtype: object
MTLPSPAAAHAELLALLTLRFTPGLGPRRIENLRRHFGSAEAALAAPLTELRRVEGLDSRSVAAIGGAKAAEEARAELNKAAERGVTLLGRGLPGYPAALEALGDPPAVLWVRGAGGA


PAL2NAL starting

#------------------------------------------------------------------------#
#  Input files:  clustalo_output/gene52_aligned.pro pal2nal_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene52_unaligned.nuc
#  Codontable 11 is used
#------------------------------------------------------------------------#


Conversion to PHYLIP starting


Creating CodeML control files

CODONML in paml version 4.8a, August 2014


----------------------------------------------
index = 53 is starting

start1               125525
end1                 126190
strand1                  -1
chromosome1    Chromosome I
start2              1416561
end2                1417094
strand2                  -1
chromosome2      Chromosome
Name: 53, dtype: object
125525
127    MFTNPFGRKKDMSDDQKKNNQPDTEADNAENIKFAADDTELRAGDE...
Name: translation, dtype: object
MFTNPFGRKKDMSDDQKKNNQPDTEADNAENIKFAADDTELRAGDETDADFEMPEGFPEMDENMFGQVQEMMAKLERVDELEKENADLKNRLGRLASDFEGYRNRTTIESAEAHDKGVSKAAEALMPVYDDIDRALSLSVDD


----------------------------------------------
index = 56 is starting

start1               136259
end1                 136738
strand1                   1
chromosome1    Chromosome I
start2              1325425
end2                1325778
strand2                  -1
chromosome2      Chromosome
Name: 56, dtype: object
136259
137    MMNGQTPTQLRPLASGLSPDERSGAYLLHLSPLLGLLLPSIGHLLG...
Name: translation, dtype: object
MMNGQTPTQLRPLASGLSPDERSGAYLLHLSPLLGLLLPSIGHLLGPVAAWWMLKSSPALDAQGKEVVNFQLTATVISLLLSVLAFVLIGAGMLGGLAGIVAPLAGMLSMFGAVGALFAILLPLSLLLSVYPLVCMVLGLMRASEGQLYRYPLTIRFLS
['MIPSPEERTWAAVAHLAPLVGYFLLIGQVLLPLAILFLGPKTPFVQAHAKESLNGQISYTLYGLGLFLLALTVVGIVVVYPLALALLALVLWNMVRGALAAGRGEVYRYAFILRLVP']

ClustalO starting

Using 1 threads
Read 2 sequences (type: Protein) from clustalo_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene56_unaligned.pro
not more sequences (2) than cluster-size (100), turn off mBed
Setting options automatically based on input sequence characteristics (might 

['MDLPKAYDPKSVEPKWAEKWAKNPFVANPKSGKPPFVIFMPPPNVTGSLHMGHALDNSLQDALIRYKRMRGFEAVWLPGTDHAGIATQVVVERLLLKEGKTRHDLGREKFLERVWQWKEESGGTILKQLKRLGASADWSREAFTMDEKRSRAVRYAFSRYYHEGLAYRAPRLVNWCPRCETTLSDLEVETEPTPGKLYTLRYEVEGGGFIEIATVRPETVFADQAIAVHPEDERYRHLLGKRARIPLTEVWIPILADPAVEKDFGTGALKVTPAHDPLDYEIGERHGLKPVSVINLEGRMEGERVPEALRGLDRFEARRKAVELFREAGHLVKEEDYTIALATCSRCGTPIEYAIFPQWWLRMRPLAEEVLKGLRRGDIAFVPERWKKVNMDWLENVKDWNISRQLWWGHQIPAWYCEDCQAVNVPRPERYLEDPTSCEACGSPRLKRDEDVFDTWFSSALWPLSTLGWPEETEDLKAFYPGDVLVTGYDILFLWVSRMEVSGYHFMGERPFKTVLLHGLVLDEKGQKMSKSKGNVIDPLEMVERYGADALRFALIYLATGGQDIRLDLRWLEMARNFANKLYNAARFVLLSREGFQAKEDTPTLADRFMRSRLSRGVEEITALYEALDLAQAAREVYELVWSEFCDWYLEAAKPALKAGNAHTLRTLEEVLAVLLKLLHPMMPFLTSELYQALTGKEELALEAWPEPGGRDEEAERAFEALKQAVTAVRALKAEAGLPPAQEVRVYLEGETAPVEENLEVFRFLSRADLLPERPAKALVKAMPRVTARMPLEGLLDVEEWRRRQEKRLKELLALAERSQRKLASPGFREKAPKEVVEAEEARLKENLEQAERIREALSQIG']

ClustalO starting

Using 1 threads
Read 2 sequences (type: Protein) from clustalo_input/Deinococcus_Radiodurans_R1_Thermus_thermophi

Progressive alignment progress done. CPU time: 0.08u 0.02s 00:00:00.10 Elapsed: 00:00:00
Alignment written to clustalo_output/gene62_aligned.pro

PAL2NAL starting

#------------------------------------------------------------------------#
#  Input files:  clustalo_output/gene62_aligned.pro pal2nal_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene62_unaligned.nuc
#  Codontable 11 is used
#------------------------------------------------------------------------#


Conversion to PHYLIP starting


Creating CodeML control files

CODONML in paml version 4.8a, August 2014


----------------------------------------------
index = 63 is starting

start1               156916
end1                 157383
strand1                   1
chromosome1    Chromosome I
start2               319340
end2                 319813
strand2                   1
chromosome2      Chromosome
Name: 63, dtype: object
156916
153    MQRIEATLLAHDLKFALVSTRWNHLIVDRLVEGAELAFVQHGGKTE...
Name: translation, dtype: obje

['MVVGVAGYYGFRNAGDEAILEAIARELKARGHEVVALSGDPKRTREDHGLRAYHRLNPLALLRADLWLLGGGGLLQDATSALSLTYYLSVLRLARLFRKRVVVFNQSLGPLSPWGERRVRKALQGVPVILRDQDSLEYARRLGIPAALGADPALLLPPPPVPREEDLVLVIPRAGVREEALTTLYVAANHLVHEGKQVLVLLLQPGYDDEVAEVFRLHRIERTSDPRRLLYLAAQAGYVISMRLHGLILAAAAGTPFAALSYDPKVAAFAKETGAYYQELPGEPIKLYKAALYGRFPDWEKVARLKERARQSFDLALGEGVPIKGSGRG']

ClustalO starting

Using 1 threads
Read 2 sequences (type: Protein) from clustalo_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene66_unaligned.pro
not more sequences (2) than cluster-size (100), turn off mBed
Setting options automatically based on input sequence characteristics (might overwrite some of your options).
Auto settings: Enabling mBed.
Auto settings: Setting iteration to 1.
Progressive alignment progress done. CPU time: 0.01u 0.00s 00:00:00.01 Elapsed: 00:00:00
Iteration step 1 out of 1
Computing new guide tree (iteration step 0)
Computing HMM from alignment
Progressive alignment progress done. CPU time: 0.05u 0.01s 00:00:00.06 Elap


PAL2NAL starting

#------------------------------------------------------------------------#
#  Input files:  clustalo_output/gene69_aligned.pro pal2nal_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene69_unaligned.nuc
#  Codontable 11 is used
#------------------------------------------------------------------------#


Conversion to PHYLIP starting


Creating CodeML control files

CODONML in paml version 4.8a, August 2014


----------------------------------------------
index = 70 is starting

start1               172108
end1                 172623
strand1                   1
chromosome1    Chromosome I
start2              1637693
end2                1638172
strand2                  -1
chromosome2      Chromosome
Name: 70, dtype: object
172108
167    MTRAFIALGANLGDPQATLRRALTELGALGEVRGVSALYRTAPVGG...
Name: translation, dtype: object
MTRAFIALGANLGDPQATLRRALTELGALGEVRGVSALYRTAPVGGPPGQPDYLNAVAELETALSAPALLAALHALEADAGRTRDVRWEARVLDLDLIVYGEDVSDDPALTLPHPRAWERGFVLAPLSDLAPSLAHPRTGEM

['MSWKDAYPDIPLGRDACGIIAMAEKSGKPSHRVVRRTLESLYRMAHRAGAIRGEGDGTGIQTDIPRELWALFLEQAGLDPGLAHNPRFFVGHFFVPKKEAGRLQEFEDLLRREGQRLGVRPVLFRRGEVVSEVLGPVGRRTEPLFLQVAGLSPDGDAPLWELGLRLEASFPVHVVSLSTHSVVYKVRGAAELLKRYYPELSRPEFKSRIALGHNRYSTNTLSTFEQVQPFGLIGHNGEINTIERLRREMDFLGIPRTGGSDSQDLNRMLEGLIYRYGLTLPEAMDLVFPPVLGEIKALPEDLQDLYMALRQRFGPLAQGPAAIVSRHGDEAVFATDAMGLRPLWQFETPYELVFSSERGVFSAEEFVSEPKPLAPGEKVYLRLTPEGAKVLPFDRHQRQVLERVAARTPVEGYRVHLTGPLRQAPPPLAGGSGVEVEEKPAPPPLGLERAFGWDRWDQAYLEALAKTGNEPIGSLGYDGPLAALNPEKPNLSEFFKETVAVVTNPAIDREREVEHFSTRTLLGRRPLPDGRGGGRVEELLLPIVLEEDQALAEAFGTLTLSEVRARFRTKTLVPQFTVEEGLLAGLKRLEEEAVKAVEEGAEVLILSDREAFQGGVWIDVGLAVAAVNRALMKRDAEGVALRRRTSLLVHSGGVRNLHDVAFLLGLGAEAVAPWLMEEKARALEGRKGLAGVLEALKKGLEKVISTMGIHELRGYGRIFSAIGLKPELAEYFGTRNFLGSEKAGYGFLELERTLLEREGFLRAEKVMPAKDFRFNPRIYKAAQEVASGKAPYAHFQEKVRALERENPVAARQLLEVRFPERSDVAPEEVDLSVGAHSLPFVISAMSFGSQGEASFRAYAEAAKRLNMLCINGEGGEIPDMLGKYTPWRGQQVASGRFGVHAYMLNSASVIEIKIGQGAKPGEGGHLPGKKVSPKVAAARNAVPGVDLISPSNNHDLYSIEDLAQLIEELKTVNPKALVSVKVPVIPGIGTIAVGIAKA


PAL2NAL starting

#------------------------------------------------------------------------#
#  Input files:  clustalo_output/gene77_aligned.pro pal2nal_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene77_unaligned.nuc
#  Codontable 11 is used
#------------------------------------------------------------------------#


Conversion to PHYLIP starting


Creating CodeML control files

CODONML in paml version 4.8a, August 2014


----------------------------------------------
index = 78 is starting

start1               197751
end1                 198434
strand1                  -1
chromosome1    Chromosome I
start2               857440
end2                 858126
strand2                   1
chromosome2      Chromosome
Name: 78, dtype: object
197751
191    MFFGPYTPLILLIFVASLVIQAYLSNTYKKWGNIRNPRNLTGAEVA...
Name: translation, dtype: object
MFFGPYTPLILLIFVASLVIQAYLSNTYKKWGNIRNPRNLTGAEVARMMLDENGLHNVPVEAVPGDLTDHYDPQQKVVRLSESTYGVPSIGAMAVAAHEVGHAVQDKVRMPALVLRGQMAVPLSLGMNFAPLLLMIGIFVHS

['MWAFPERFEGRHVRLEPLALAHLPAFLRHYDPEVYRFLSRAPVAPTEEALRAHLEGLLGEPGRVNWAILFGKEVAGRISVIAPEPEHAKLELGTMLFKPFWGSPANKEAKYLLLRHAFEVLRAERVQFKVDLRNERSQRALEALGAVREGVLRKNRRLPDGAFRDDVVYSVLKEEWPGVKARLEARLYGASGNP']

ClustalO starting

Using 1 threads
Read 2 sequences (type: Protein) from clustalo_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene81_unaligned.pro
not more sequences (2) than cluster-size (100), turn off mBed
Setting options automatically based on input sequence characteristics (might overwrite some of your options).
Auto settings: Enabling mBed.
Auto settings: Setting iteration to 1.
Progressive alignment progress done. CPU time: 0.01u 0.00s 00:00:00.01 Elapsed: 00:00:00
Iteration step 1 out of 1
Computing new guide tree (iteration step 0)
Computing HMM from alignment
Progressive alignment progress done. CPU time: 0.02u 0.00s 00:00:00.02 Elapsed: 00:00:00
Alignment written to clustalo_output/gene81_aligned.pro

PAL2NAL starting

#---------------------------------------------


Conversion to PHYLIP starting


----------------------------------------------
index = 85 is starting

start1               213798
end1                 214715
strand1                  -1
chromosome1    Chromosome I
start2                50129
end2                  50983
strand2                   1
chromosome2      Chromosome
Name: 85, dtype: object
213798
210    MRVSLEVPAMIRPLHATDLPDLLALLHWMDAAPEREVLAPDARTLN...
Name: translation, dtype: object
MRVSLEVPAMIRPLHATDLPDLLALLHWMDAAPEREVLAPDARTLNELRLECEDPAALVDEGEEGVRAYCALSPFRDGLALEGPLAEHGTDLRGLLSRAAQQAEGAPVYAFCARDNLPVRSALEAAGFAPMHTTDFYAAPLERWRTGSEKSSGKKAQLPPGYTLTRELPLTEYRALYRAAEDTWAARLDWSPEQYDAHFAREDVRLLALRDSEGRPVAFAELELCAPDSRAELTHLAVHPAQRGQGLGRALLALAAAEAAQSPEIRTLRARAHDHMSAARQLYARAGLSHCRAVVTSLREGDEEA
['MIRPVARKDLPGLLRLLRHMDQSPERGVLAPEARDLEGLAEELEDGLVLLKEGEVAGYVGLYPFWDGAALEGPLAYREEDLPPLLEAAEGRAREVEVERLYAFPREENATLRKALEGAGFGLLHVTYFFVKRPEGLDYPAPEGVRVEEGFPGAGVYRELYRESEESWALRLRWTDEELEEHFQDPAVHLLVAYLKGVPVGLAEVELEGGEASVAYIGVVPEARGKGIGRTLLSEAAKLARRKG

Progressive alignment progress done. CPU time: 0.04u 0.00s 00:00:00.04 Elapsed: 00:00:00
Alignment written to clustalo_output/gene88_aligned.pro

PAL2NAL starting

#------------------------------------------------------------------------#
#  Input files:  clustalo_output/gene88_aligned.pro pal2nal_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene88_unaligned.nuc
#  Codontable 11 is used
#------------------------------------------------------------------------#


Conversion to PHYLIP starting


Creating CodeML control files

CODONML in paml version 4.8a, August 2014

codon     216: TCC AGC 
codon     248: AGC TCC 

----------------------------------------------
index = 89 is starting

start1               220657
end1                 222090
strand1                  -1
chromosome1    Chromosome I
start2              1445013
end2                1446404
strand2                   1
chromosome2      Chromosome
Name: 89, dtype: object
220657
217    MSEHFHEHDLNFDKPREECGVFGIYSAQPNDL

Progressive alignment progress done. CPU time: 0.03u 0.00s 00:00:00.03 Elapsed: 00:00:00
Alignment written to clustalo_output/gene91_aligned.pro

PAL2NAL starting

#---  ERROR: inconsistency between the following pep and nuc seqs  ---#
>Deinococcus_Radiodurans_R1
MRLRPTPFPLWRNCTPKASTARPKPLLLLAPLRFRPLARTQRGGVMKTAVIQFPGSNCDA
DALHAARLLLDDGAQFVWHTETALPEGTELVFLPGGFSYGDHLRSGAIAARSPIMNAVKA
HAEAGGYVLGVCNGFQVLTEAGLLPGALSRNKELHFMCKPVHLRVENNATDFSRAYGPGQ
IIEIPIAHGEGNYYADAATIAELEEGGRVVFRYADNPNGSLNDIAGIVNERGNVLGMMPH
PERAVELLLGSEDGKGVFESLKTVKK
>Deinococcus_Radiodurans_R1
ATGAAAACAGCAGTCATCCAATTCCCCGGCTCCAACTGCGACGCCGACGCCCTGCACGCC
GCCCGGCTGCTGCTCGACGACGGCGCACAGTTCGTCTGGCACACCGAAACTGCGCTGCCC
GAAGGCACTGAACTGGTGTTTCTGCCCGGCGGCTTTTCCTACGGCGACCACCTCCGCAGC
GGCGCGATTGCCGCCCGCAGCCCCATCATGAACGCCGTCAAGGCCCACGCCGAGGCTGGC
GGCTACGTGCTGGGCGTGTGCAACGGCTTTCAGGTGCTGACCGAAGCGGGACTCCTGCCC
GGCGCGCTGAGCCGCAACAAGGAACTGCACTTCATGTGTAAGCCGGTGCATCTGCGGGTG
GAAAACAACGCCACCGACTTTTCCCGCGCCTACGGGCCGGGCCAGATCATCGAAATCCCT
ATCGCGCACG

CODONML in paml version 4.8a, August 2014

codon      73: TCG AGC 
codon     115: TCG AGC 

----------------------------------------------
index = 95 is starting

start1               230497
end1                 230973
strand1                   1
chromosome1    Chromosome I
start2                52473
end2                  52922
strand2                  -1
chromosome2      Chromosome
Name: 95, dtype: object
230497
228    MTDAEASGPLLHVVLFEPEKAGNVGNVARTCSVLGAELHLIRPFGF...
Name: translation, dtype: object
MTDAEASGPLLHVVLFEPEKAGNVGNVARTCSVLGAELHLIRPFGFHLHDREFRRAVMDYLQGVTLHEYAGWSDFQAQLPDAARVFAFSTHATEYHTRAGFKRGDYLLFGPESRGLPVWLRDALPKLKLPQPGRGRSLNLSVAVGAAAFEAGRQIERW
['MLHLVLYQPEIPQNAGNVARTAAALGWPLHLIRPLGFLLSSPKLKRAGLDYWPHVDLRLHDSFAAFLEALPRGARVFAFSARGEASLYEARFREGDYLLFGPESRGLPEEVLARFPTLKIPMPGPVRSLNLAVAVGVAAYEAYRQLTGR']

ClustalO starting

Using 1 threads
Read 2 sequences (type: Protein) from clustalo_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene95_unaligned.pro
not more sequence

['MNERKKSIDELLSDLGVLEEAPVEVELKESGTPEAQGDPRAFLEEFLGGLLLLLDPGHRLEVRVEGETLKAEVKGGDLGRFIGKEGRTLRAVEHLARVVLARRFGTGYRLVLDAAGYRSRAEARIRRLAEEAALTVAMTGEPLHLPPMPPGERRIVHMLLKNHPRVTTESQGEGEERHVVVYPRREAQG']

ClustalO starting

Using 1 threads
Read 2 sequences (type: Protein) from clustalo_input/Deinococcus_Radiodurans_R1_Thermus_thermophilus_HB8_gene98_unaligned.pro
not more sequences (2) than cluster-size (100), turn off mBed
Setting options automatically based on input sequence characteristics (might overwrite some of your options).
Auto settings: Enabling mBed.
Auto settings: Setting iteration to 1.
Progressive alignment progress done. CPU time: 0.01u 0.00s 00:00:00.01 Elapsed: 00:00:00
Iteration step 1 out of 1
Computing new guide tree (iteration step 0)
Computing HMM from alignment
Progressive alignment progress done. CPU time: 0.02u 0.00s 00:00:00.02 Elapsed: 00:00:00
Alignment written to clustalo_output/gene98_aligned.pro

PAL2NAL starting

#--------------------------------------------------

In [53]:
seq = Seq("ATGCTCAAGATCCGTTCTCTCGGCCACAGCACTTTCTTTCTCGATGACGGGACGCACCGT\
CTGCTCATCGAACCCTTTCTCGAAGGCAACCCGCGCTGCCCGGTGACCCTCGGCGAAGTG\
CAGTCGTGGCAGCCGAGCGCCGTGCTCATCAGCCACGCCCACGGCGACCACTGGGGCAAC\
GCCCTGGATTTCGGACGGGCGGGCGTGCCGATCATCGCCACCGCCGAGATCGCCGGGTAC\
GCCGGAGCGCACGGCGCCAACAACGCCGTCGGCATGAACATCGGCGGCACCTACCGCGCC\
GAGTGGGGCAGCGTTTCCCTGACCCCCGCGTGGCACTCGAGCTCCTTTCCCGACGGCACC\
TACGGCGGAATGCCCACGGGTCTGGTCATCGAGTTCGGCGGCCAGCGCCTGTATTTCGCG\
GGCGACACCGCGCTGTTTTCCGACATGCGCCTGATCGGGGACCGCGAACTCGACCTCGCG\
TTCCTGCCCATCGGCGACCACTACACCATGGGGCCGGAAGAGGCCGGGCGCACGCTGGAC\
TTGCTGCGTCCGCGCGTTGCCATTCCCATGCACTACGCGACTTTCCCGGCCCTGACCGGC\
GACCCCGCCGTCTTCCGCACCGAGGGCGAACGGCGCGGCGTGGAGGTCCGGGTCCTTGAC\
CCCGGCGAGACGACCGAGCTGTAA")

In [55]:
seq.translate(table=11)

Seq('MLKIRSLGHSTFFLDDGTHRLLIEPFLEGNPRCPVTLGEVQSWQPSAVLISHAH...EL*', HasStopCodon(ExtendedIUPACProtein(), '*'))

In [171]:
with open(in_file_nuc) as f:
    l = f.readlines()
    print(l)
    ls=[]
    ls.append(l[0])

    s = ""
    for i in range(len(l[1])):
        s += l[1][i]
        if i % 60 == 59:
            s += "\n"
        elif i % 10 == 9:
            s += " "
    ls.append(s)
    
    ls.append(l[2])
    s = ""
    for i in range(len(l[3])):
        s += l[3][i]
        if i % 60 == 59:
            s += "\n"
        elif i % 10 == 9:
            s += " "
    ls.append(s)
    with open(out_file_for_codeml, mode='w') as fw:
        fw.writelines(ls)
    

['>Deinococcus gene1\n', 'TCACGCGAACTCTGGCCTCGGTTCAAGCGGCGGTGAACTTTCCGGGTGATGGGCCAGGGCAGACATGCCCCGTAACGGCCTCAGAAGGCCCCTTAAACGCGCAGCGTGACCATGACCGCCATATACCCGCCTCCCCCACCTACGGGCGCGGAAAATGGCGGGGCTGGTGGACCCGGAGAACAGCAGCTCGGCGTCTCCGTCAATCGGGCCCAGCGCATCGAGCACATGGCGAGCGTTGAAGGCGAGGCTCATCGCCTGCTCGGTGCCGCCCTGGGTGACGCTGAGCGTGTCCTGAGCGCGGCCATAGTCGCCCTCCGCAGCGAGGCGCAGAGTGCCTTCGGACACCAGAAACTCGACGCGGTTGTTGGCGTTTTTGTCGGCCAGCACGGCCACACGGTTGACCGCTTCCTTGAGGGCGGTGGCGGGCAGTGTCACCTGAAGTTTGATGTCCTTGGGAATGACCCGCTCGTAGTCGGGAAAATCACCGTCGAGCAGCTTGAGGTTCATCTTCACGCGGTCGGTGGTCACGGTGAGCATGCCGTCGCCGTAGGTGAACCGCGCCTCGCCGTCCTTGAGCACGCGAATCAGTTCGTCCACGCTGCGGGCGGGAATAATCAGGTTTTTGCCGTCGCCGCTCGCCGGAAAGTCGCGGATAGCCACCCGGTAACCGTCGGACGCCACCACGCGGGCGCTCTCGCCGTGGTGCTCAAGCTTAATGCCGCGAAACACCGCCTGAAACGCCTCGTTGCTTGCCGCGTAGCGCACGCTGGAAAAGGCGCGGGACAGTTCGCCGCCGTCCAGGCTCACATCGGCCTGTGCGGGGAAAGAGAGTGGCGGGTACGCTTCGATGTCACCGGTCTGGAGCTTGAAATCTGAGCCGCCCGAGCGCACCGAGAGTTCCTGGCCGCTCAGTTCGAGTTCGACGAGCTCACCGCCGAGGTTGCGAACGATTTGCGCGAACAGGTGCGCCGGCA

In [182]:
from skbio import Alignment, DNA

ImportError: cannot import name 'Alignment'

In [75]:
with open(out_file) as f:
    l = f.readlines()
    print(len(l))
        
    #並べる用のindex
    idx1 = 0
    idx2 = int(len(l) / 2)
    
    #新しく書き込む用の配列
    ls = []
   
    #ギャップの記号を-から.に書き換える
    for i in range(len(l)):
        l[i] = l[i].replace('-', '.')

    
    for i in range(int(len(l)/2)):
        if i == 0:
            ls.append(l[idx2 + i][1:])
            ls.append(l[idx1 + i][1:])
            continue
        else:
            ls.append( "{}\n".format(60*(i-1)+1))
            aligned_seq1_with_space1 = ""
            aligned_seq1_with_space2 = ""
            for j in range(len(l[i])):
                aligned_seq1_with_space1 += l[idx1 + i][j]
                aligned_seq1_with_space2 += l[idx2 + i][j]
                if j % 3 == 2:
                    aligned_seq1_with_space1 += " "
                    aligned_seq1_with_space2 += " "
#                 print(aligned_seq1_with_space)
            ls.append(aligned_seq1_with_space2)
            ls.append(aligned_seq1_with_space1)
    print(ls)
    
    #1行目の追加
    idx = int(len(l)/2)-1
    ortho_len = 60 * (idx-1) + len(l[idx]) - 1 #-1は\nの分
    ls.insert(0, "\t2\t{}\tI\n\n".format(ortho_len))
    
    with open(out_file_for_codeml, mode='w') as fw:
        fw.writelines(ls)
            
    #print(f.read())
    
    
# リファレンスには.があってはいけないみたい
# じゃあどうするねん

44
['Thermus thermophilus gene1\n', 'Deinococcus gene1\n', '1\n', 'TGA ACA TAA CGG TTC CCA AAA AAC TCC TCT CGG ACC AGC TTT C.. ... ... CCT CCT GGA \n', '.CA CGC GAA CTC TGG CCT CGG TTC AAG CGG CGG T.G AAC TTT CCG GGT GAT GGG CCA GGG \n', '61\n', 'GCG CAT CGT CCC CTC TAG AAG CGC ..C AAC CCC CTC TAC ACC TAC CTG GGG CTT TAC GCC \n', 'CAG ACA TGC CCC GTA ACG GCC TCA GAA GGC CCC TTA AAC GCG CAG CGT GAC CAT GAC CGC \n', '121\n', 'GAG GAA GGG GCC TTG ATC CTC TTC GGG ACC AAC G.. ... ... GGG AGG TGG ACC TCG AGG \n', 'CAT ATA CCC GCC TCC CCC ACC TAC GGG CGC GGA AAA TGG CGG GGC TGG TGG ACC CGG AGA \n', '181\n', 'TCC GCC TCC CCG CCG AGG CCC AAA GCC TTC CCC GGG TGC TCG TCC CCG CCC AGC CCT TCT \n', 'ACA GCA GCT CGG CGT CTC CGT CAA ... ... ... ... ... ... .TC GGG CCC AG. ..C GCA \n', '241\n', 'TCC AGC TGG TGC GGA GCC TTC CTG GGG ACC TCG TGG CCC TCG GCC TCG CCT CGG AGC CGG \n', 'TCG AGC ACA TGG CGA GCG TTG AAG GCG AGG CTC ATC GCC TGC ... ... ..T CGG TGC CGC \n', '301\n', 'GCC AGG GGG GGC AGC TGG AGC T

In [140]:
for record_id, record in records1.items():
    print(record_id)
    for feature in record.features:
        if(feature.type == "CDS"):
            print(feature.qualifiers['translation'])
    break

NC_001263.1
['MMKANVTKKTLNEGLGLLERVIPSRSSNPLLTALKVETSEGGLTLSGTNLEIDLSCFVPAEVQQPENFVVPAHLFAQIVRNLGGELVELELSGQELSVRSGGSDFKLQTGDIEAYPPLSFPAQADVSLDGGELSRAFSSVRYAASNEAFQAVFRGIKLEHHGESARVVASDGYRVAIRDFPASGDGKNLIIPARSVDELIRVLKDGEARFTYGDGMLTVTTDRVKMNLKLLDGDFPDYERVIPKDIKLQVTLPATALKEAVNRVAVLADKNANNRVEFLVSEGTLRLAAEGDYGRAQDTLSVTQGGTEQAMSLAFNARHVLDALGPIDGDAELLFSGSTSPAIFRARRWGRRVYGGHGHAARLRGLLRPLRGMSALAHHPESSPPLEPRPEFA']
['MRKNVSDLEYTTWFAPVKPLGVQEGSLLLGVRNSFTKDWFRDHYLELLLAALRSLGAEHPQVEFQVLPAAQDALLLPNDPPPAPEAAAPTPKTKAAPTPPPSTPGDNRKTLNPKYTFENFVVGPNNNLAHAAALAVAESPGKAYNPLFIYGDVGLGKTHLMHAVGHYLAERFPEKRIEYVSTETFTNELINAIRDDKTTQFRNRYRSVDLLLVDDIQFLAGKERTQEEFFHTFNALYESNKQIILSSDRPPKDIQTLEGRLRSRFEWGLITDIQSPEYETRVAILKMNAEQGHITIPQEVLELIARQVTSNIRELEGALMRVVAFASLNNVPFSRAAAAKALSNVFAPQEAKVEMTDVLRQVAAHYGTTPDLIRGSGRARDIVVPRQVAQYLIRALTDHSLPEIGQFFGRDHSTVMHAVSKITEQMGKDPELAATVNTLRNRIQGKEEEEEVGA']
['MGTGDPSPLASQGPLPLVEGQKMKKPPPVRRGMNEAMEDRGSFFMALVSAYALHLARPFPALALAAGYLFVEGEDGCVAPRFAFLNGAGGHGVDVGQPQQFGARQAVAGGLDAAFLTYRRAVLAKDGAV

In [284]:
for feature in ch1.features:
    #print(records[key])
    if(feature.type == 'CDS'):
        url_with_q = url + "/sequence/?query=" + "{}".format(feature.qualifiers['translation'])
        print(url_with_q)
        response = requests.get(url_with_q)
        print(response.status_code)
        #print(type(response))
        #print(dir(response))
        if response.status_code != 200:
            continue
        print(response)
        dic = json.loads(response.content.decode())
        #print(dic)
        omaid = dic['targets'][0]['omaid']
        url_for_orthology = url + "/protein/" + "{}".format(omaid) + "/orthologs/?rel_type=1%3A1";
        res_orthologs = requests.get(url_for_orthology)
        json_ortho_list = json.loads(res_orthologs.content.decode())
        set_ortho_list = set()
        for ortho in ortho_list:
            set_ortho_list.add(ortho['entry_nr'])
    break

In [286]:
ortho_list = json.loads(res_orthologs.content.decode())
ortho_list

[{'entry_nr': 115488,
  'entry_url': 'https://omabrowser.org/api/protein/115488/',
  'omaid': 'FERPA00703',
  'canonicalid': 'D3RWP1',
  'sequence_md5': '014f10a7db88eb0921cf9df3f1786732',
  'oma_group': 813003,
  'oma_hog_id': 'HOG:0018779',
  'chromosome': 'Chromosome',
  'locus': {'start': 638640, 'end': 639413, 'strand': 1},
  'is_main_isoform': True,
  'rel_type': '1:1',
  'distance': 193.0,
  'score': 181.88999938964844},
 {'entry_nr': 138251,
  'entry_url': 'https://omabrowser.org/api/protein/138251/',
  'omaid': 'NATM801448',
  'canonicalid': 'M1XR41',
  'sequence_md5': '629690ddb2cd951a3ccbd6dd804733eb',
  'oma_group': 627788,
  'oma_hog_id': 'HOG:0016462.1a',
  'chromosome': 'A',
  'locus': {'start': 1439312, 'end': 1440145, 'strand': -1},
  'is_main_isoform': True,
  'rel_type': '1:1',
  'distance': 155.0,
  'score': 193.5500030517578},
 {'entry_nr': 148897,
  'entry_url': 'https://omabrowser.org/api/protein/148897/',
  'omaid': 'HALS302639',
  'canonicalid': 'OE6220R',
  's

In [129]:
seq1 = records1[list(records1.keys())[0]]
j=0
global seq
for i in seq1.features:
    if(i.type == 'CDS'):
        print(i.qualifiers['translation'])
        print(i)
        seq = records1[list(records1.keys())[0]].seq[0:1182]
        j+=1
        if j>2:
            break

['MMKANVTKKTLNEGLGLLERVIPSRSSNPLLTALKVETSEGGLTLSGTNLEIDLSCFVPAEVQQPENFVVPAHLFAQIVRNLGGELVELELSGQELSVRSGGSDFKLQTGDIEAYPPLSFPAQADVSLDGGELSRAFSSVRYAASNEAFQAVFRGIKLEHHGESARVVASDGYRVAIRDFPASGDGKNLIIPARSVDELIRVLKDGEARFTYGDGMLTVTTDRVKMNLKLLDGDFPDYERVIPKDIKLQVTLPATALKEAVNRVAVLADKNANNRVEFLVSEGTLRLAAEGDYGRAQDTLSVTQGGTEQAMSLAFNARHVLDALGPIDGDAELLFSGSTSPAIFRARRWGRRVYGGHGHAARLRGLLRPLRGMSALAHHPESSPPLEPRPEFA']
type: CDS
location: [0:1182](-)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: db_xref, Value: ['GeneID:1799546']
    Key: locus_tag, Value: ['DR_0001']
    Key: note, Value: ['similar to SP:P52851 PID:1321894 percent identity: 54.97; identified by sequence similarity; putative']
    Key: product, Value: ['DNA polymerase III subunit beta']
    Key: protein_id, Value: ['NP_293727.1']
    Key: transl_table, Value: ['11']
    Key: translation, Value: ['MMKANVTKKTLNEGLGLLERVIPSRSSNPLLTALKVETSEGGLTLSGTNLEIDLSCFVPAEVQQPENFVVPAHLFAQIVRNLGGELVELELSGQELSVRSGGSDFKLQTGDIEAYPPLSFPAQADVSLDGGELSRAFSS

In [130]:
seq = records1[list(records1.keys())[0]].seq[1903:3268]
seq.reverse_complement().translate(table=11)

Seq('VRKNVSDLEYTTWFAPVKPLGVQEGSLLLGVRNSFTKDWFRDHYLELLLAALRS...GA*', HasStopCodon(ExtendedIUPACProtein(), '*'))

In [131]:
seq.reverse_complement()

Seq('GTGCGCAAAAACGTCTCCGACTTGGAGTACACGACCTGGTTCGCGCCGGTCAAA...TAA', IUPACAmbiguousDNA())