In [1]:
from sequence_tools import run_genscan, RNAsequence, DNAsequence, AminoAcidSequence, fastq_filter
from bio_files_processor import convert_multiline_fasta_to_oneline

### 1. GenScan Parser

Run run_genscan for the file *example_data/transferrin.fasta*

In [2]:
results = run_genscan(sequence_file='example_data/transferrin.fasta')

Explore the results

In [3]:
results

GenscanOutput run with status code 200. 6 cds were found. 50 intorns were found. 52 exons were found

In [4]:
results.cds_list

{'GENSCAN_predicted_peptide_1': 'MRLAICALLCAGALMGAGQSVQPTEHDPKQVLEVLDRAIRQEKETKGIRLGKEEVKFSLWADDMIVYLENPIVSAQNLLKLISNFSKVSGYKINVQKSQAFLYTNNRQTESQIMSELPFTIASKRIKYLGTQLTRDVKDLFKNYKPLLNEIKEDTNKWKNIHCSWIGRINFVKMAILPKANEADAVTIDGGLVFEAGLAPYSLKPIVVEIYGSKDAAAKFFSSSCVPCEDMKNFPRLCQLCAGKGTDKCACSSQESYFGYAGALKNLANKADKDQYELLCLNNTQKLEHFGKDKSSEFQLFGSPHETDLLFTDAAHGFLMVPPKIDAKLYLGYEYFSATQDPKRGGVIPPRKGEADAMSLDAGFIYIAGKCGLVPVLAENYKQFRSKCVNLPMEGPQPGDMLYLIPLCCHV',
 'GENSCAN_predicted_peptide_2': 'MIMMKGSIQQEDITIGCARGDSLIGSKLTISEIKISMKEKTQNSLCGFFSASLQQICQPASPVPSSAVQAQVSALSRREHPEHLSRQCPWCKLLHMRFSHLQPTGSLFILQKKRDTKQDSGSFQMQQPDAGMMEEVTGQEMYWLGAKIKVVHRHYLPESPTGAVVSLLAVVSPYVGRIAPATTQSTERGIRIRPRGTTATEEIICEKGQPEKQKDEKEEDLLKEM',
 'GENSCAN_predicted_peptide_3': 'MDKFLNTYTLPRLNQEEVESLNRPITGSEIVAIINSLPTKKSPGPDGFTAEFYQRYKEELVPFLLRLFQSIEKEGILPYSFYEASIILISKPGRDTIKKENFRPISLMNIDAKILNKILANRIQQHVQKLIHHDQVGFIPGMQGWFNIRKLINVIQHINRTKDKNHMIISIDVEKAFDKIQQPFMLKTLNKLGIDGTYLKIIRAIYDKPTANIILNGQKLEAFLLKTGTRQGCPLSPLLFNIVLEVLARAIRQEKEIKG

### 2. Convert multiline .fasta to oneline .fasta

In [5]:
convert_multiline_fasta_to_oneline('example_data/example_multiline_fasta.fasta')

Multiline FASTA is converted to oneline!


Converted files is stored in new folder

### 3. Sequencing games

In [6]:
rna = RNAsequence('AUGCAUGCAUGCAUGC')
rna.gc_content()

0.5

In [7]:
rna = RNAsequence('AUGCAUGCAUGCAUGC')
rna.complement()

'UACGUACGUACGUACG'

In [8]:
type(rna)

sequence_tools.RNAsequence

In [9]:
dna = DNAsequence('ATGCATGCATGCATGC')
dna.transcribe()

'UACGUACGUACGUACG'

In [10]:
type(dna), type(dna.transcribe())

(sequence_tools.DNAsequence, sequence_tools.RNAsequence)

In [11]:
aminoacid = AminoAcidSequence('KTGFALKPGF')
aminoacid.protein_mass()

1046.5912700000001

### 4. Filter fastqc

In [12]:
fastq_filter('example_data/example_fastq.fastq', 'filtered_fastq', gc_bound=(40,60), length_bound=(0, 200), quality_threshold=25)

Fastq filtering has been finished!


Filtered files is stored in new folder