# Downloading Project

Imports from phylogenetics project.

In [1]:
from phylogenetics.project import Project

In [2]:
# List of accession ids to download from BLAST database.
accessions = ["AGH62057",
    "NP_004553",
    "AHW56551",
    "BAA25751",
    "ABN46990"
]

Like always, Initialize a phylogenetics project object.

In [3]:
project = Project()

Download the metadata (i.e. sequences, ids, species, organism, etc.) from BLAST database. You must give BLAST your email for tracking purposes.

In [4]:
project.download(accessions, "zachsailer@gmail.com")

Align the downloaded sequences using MSAProbs.

In [5]:
project.align()

Alignment finished.


In [6]:
project.Alignment.Write.phylip(fname="ali.phy")

In [7]:
project.HomologSet.XX00000005.attrs

{'accver': 'ABN46990.1',
 'defline': 'parkin 2 [Homo sapiens]',
 'gi': '125630745',
 'id': 'XX00000005',
 'length': '465',
 'orgname': 'Homo sapiens',
 'seqtype': 'None',
 'sequence': 'MIVFVRFNSSHGFPVEVDSDTSIFQLKEVVAKRQGVPADQLRVIFAGKELRNDWTVQNCDLDQQSIVHIVQRPWRKGQEMNATGGDDPRNAAGGCEREPQSLTRVDLSSSVLPGDSVGLAVILHTDSRKDSPPAGSPAGRSIYNSFYVYCKGPCQRVQPGKLRVQCSTCRQATLTLTQGPSCWDDVLIPNRMSGECQSPHCPGTSAEFFFKCGAHPTSDKETSVALHLIATNSRNITCITCTDVRSPVLVFQCNSRHVICLDCFHLYCVTRLNDRQFVHDPQLGYSLPCVAGCPNSLIKELHHFRILGEERYNRYQQYGAEECVLQMGGVLCPRPGCGAGLLPEPDQRKVTCEGGNGLGCGFAFCRECKETYHEGECSAVFEASGTTTQAYRVDERAAEQARWEAASKETIKKTTKPCPRCHVPVEKNGGCMHMKCPQPQCRLEWCWNCGCEWNRVCMGDHWFDV',
 'taxid': '9606'}

Construct a phylogenetic tree of the sequences using PhyML

In [8]:
project.tree()

In [9]:
project.Tree.Write.nexus(fname="test.nxs")

In [9]:
project.Tree.reroot("YY00000002")

In [9]:
project.Tree.stats

{'Data set': '#1',
 'Discrete gamma model': 'Yes',
 'Gamma shape parameter': '28.845',
 'Initial tree': 'BioNJ',
 'Model of amino acids substitution': 'LG',
 'Number of categories': '4',
 'Number of taxa': '5',
 'Parsimony': '6',
 'Random seed': '1463450473',
 'Run ID': 'none',
 'Sequence filename': 'ml-tree.phy',
 'Subtree patterns aliasing': 'no',
 'Time used': '0h0m0s (0 seconds)',
 'Tree size': '0.01336',
 'Tree topology search ': 'NNIs',
 'Unconstrained likelihood': '-1730.22164',
 'Version': '20120412'}

Reconstruct the ancestral sequences at each node in the tree.

In [20]:
project.reconstruct()

In [21]:
project.Tree._DendroPyTree.print_plot()

/---------------------------------------------------------------------------------------------- XX00000004
|                                                                                                         
|                                              /----------------------------------------------- XX00000002
+----------------------------------------------+                                                          
|                                              \----------------------------------------------- XX00000003
|                                                                                                         
|                                              /----------------------------------------------- XX00000005
\----------------------------------------------+                                                          
                                               \----------------------------------------------- XX00000001
                                     

In [22]:
project.HomologSet.Write.fasta(fname="file-to-load.fasta")

In [23]:
project.HomologSet.Write.csv(fname="file-to-load.csv",
    tags=("id","accver","orgname","sequence","taxid", "latest_align")
)