In [1]:
import jinfo as j

#### Create simple DNA sequence objects and retrieve sequence, label, length, molecular weight and melting temp:


In [2]:
seq_1 = j.DNASeq("ATGAGGATAGATCCCTATTAA", label="simple_dna_sequence")
print(seq_1)
print(seq_1.len)
print(seq_1.MW())
print(seq_1.tm())

simple_dna_sequence	ATGAGGATAGATCCCTATTAA
21
13006.49
43.87


#### Can get the mRNA transcription of a DNA sequence object, and probe features:

In [3]:
seq_1_mRNA = j.RNASeq(seq_1.transcribe(), label="simple_rna_sequence") # Should transcribe/translate return an RNASeq/AASeq object?
print(seq_1_mRNA)
print(seq_1_mRNA.reverse_transcribe())
print(seq_1_mRNA.MW())

simple_rna_sequence	AUGAGGAUAGAUCCCUAUUAA
ATGAGGATAGATCCCTATTAA
6448.090000000001


#### Translate the DNA or RNA sequences to get a protein:

In [4]:
seq_1_prot = j.AASeq(seq_1.translate(), label="simple_protein_sequence")
print(seq_1_prot)
print(seq_1_prot.MW())

simple_protein_sequence	MRIDPY*
883


#### Can perform DNA or protein alignments:
(requires MUSCLE backend)

In [5]:
seq_2 = j.DNASeq("ATGAGGAACTTGATAGATCCCTA", label="simple_dna_homolog_1")
seq_3 = j.DNASeq("ATGAGGATAGATCCTTACCTCTA", label="simple_dna_homolog_2")
seq_4 = j.DNASeq("ATGAGGATAGAGGCCTCCCTA", label="simple_dna_homolog_3")

simple_alignment = seq_1.align(seq_2)
print(simple_alignment)

simple_dna_sequence	ATGAG------GATAGATCCCTATTAA
simple_dna_homolog_1	ATGAGGAACTTGATAGATCCCTA----



In [6]:
multiple_alignment = j.multialign([seq_1, seq_2, seq_3, seq_4])
print(multiple_alignment) 

simple_dna_homolog_2	ATGAG------GATAGA----TCCTTACCTCTA
simple_dna_homolog_3	ATGAG------GATAGAGGCCTCCCTA------
simple_dna_sequence	ATGAG------GATAGA----TCCCTA--TTAA
simple_dna_homolog_1	ATGAGGAACTTGATAGA----TCCCTA------



#### From alignment objects phylogenetic trees can be calculated:
(requires FastTree backend)

In [7]:
simple_tree = multiple_alignment.calc_tree()
print(simple_tree.tree) # Newick format tree...

(simple_dna_sequence:0.00054,simple_dna_homolog_1:0.00055,(simple_dna_homolog_3:0.00055,simple_dna_homolog_2:0.16226)0.177:0.00055);



#### For ML applications One-hot encoding DNA is helpful:

In [8]:
print(seq_1.one_hot())

[1 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 1 0 0 1 0 0 0 0
 0 0 1 1 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 1 0 0 1 0 0 0 0 1 0 0 0 1
 0 0 1 0 0 0 1 0 0 0]


#### You can read sequence objects and alignments from fasta files: