In [1]:
import GRASPy as gp 

Documentation can be found here - https://sebporras.github.io/GRASPy/



# Performing joint reconstructions.

Step 1) Submit the job to the server. You should specify either Protein, DNA or RNA but it will try guess the sequnce type if you forget. 

In [None]:
request = gp.JointReconstruction(aln="./example_data/joint_recon/GRASPTutorial_Final.aln",
                                nwk="./example_data/joint_recon/GRASPTutorial_Final.nwk", 
                                alphabet="Protein")

job_id = request["Job"]

Step 2) Find out where your job is in the queue or the status

In [None]:
#queue = g_requests.PlaceInQueue(job_id)
status = gp.JobStatus(job_id)

Step 3) Retrieve your job which will have the POG graphs 

In [None]:
graphs = gp.JobOutput(job_id)

Step 4 - Optional) Request POGs for extant sequences 

In [None]:
extant_tree = gp.ExtantPOGTree(aln="./example_data/big_test_data/GRASPTutorial_Final.aln",
nwk="./test_data/big_test_data/GRASPTutorial_Final.nwk")


In [None]:
extant_tree

Step 5 - Option 1) Build a POG tree from extant and ancesor POGs using both of the server outputs 

- The advantage of doing it this way is that the POGTree object will contain sequence information on the BranchPoints for ancestors AND extants 

In [None]:
tree = gp.POGTreeFromJointReconstruction(extant_tree, graphs)

tree.branchpoints["XP_012687241.1"].seq

Step 5 - Option 2) Build a POGTree from the ancestor POG and from a nwk file string 

- ONLY ancestors will have sequence information based on the most likely symbol at each position in the sequence

In [None]:
tree = gp.POGTreeFromJointReconstruction(nwk="./example_data/joint_recon/GRASPTutorial_Final.nwk", POG_graphs=graphs)

In [None]:
tree.writeToNwk(file_name="test_nwk")

# Learning distributions from data 

The following instructions demonstrate how to learn a probability distribution from data. I need to add option to change some of the parameters as currently just runs on default settings. 


Step 1) Send request to the server

In [None]:
import GRASPy as gp 

request_2 = gp.LearnLatentDistributions(nwk="./example_data/EMTrain/3_2_1_1_filt.nwk", 
                                        states=["A", "B"],                                        
                                        csv_data="./example_data/EMTrain/3_2_1_1_data.csv")

second_id = request_2["Job"]

Step 2) Check the status of your job 

In [None]:
place = gp.PlaceInQueue(second_id)

Step 3) Retrieve your job and save the output

In [None]:
out = gp.JobOutput(second_id)

out

Step 4) The learnt distribution can then be marginalised on an ancestor node

In [None]:
j_distrib = out["Result"]["Distrib"]

infer = gp.MarginaliseDistOnAncestor(nwk="./example_data/EMTrain/3_2_1_1_filt.nwk", 
                        states=["A", "B"], 
                        csv_data="./example_data/EMTrain/3_2_1_1_data.csv",
                        distrib=j_distrib,
                        ancestor=0)

job_three_id = infer["Job"]

In [None]:
infered_distribution = gp.JobOutput(job_three_id)

infered_distribution