In [1]:
import os 
import subprocess
from transcriptomics import * 

Initializing transcriptomics package


# Drosophila Melanogaster

Build transcriptome object 

In [None]:
mel_genome_path = "raw_data/dmel/genome/dm6.fa"
mel_transcriptome_path = "raw_data/dmel/transcriptome/dm6.ncbiRefSeq.gtf"
mel_object_name = "mel_transcriptome"
update_transcriptome_object(mel_genome_path, mel_transcriptome_path, mel_object_name)

In [4]:
tr = load_transcriptome_object(mel_object_name)

In [7]:
check_exons_contain_all_features(tr)

Export mRNA to Fasta file

In [7]:
output_dir = "raw_data/dmel/transcriptome/mRNA_no_introns"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

output_path = os.path.join(output_dir, "mRNA_no_introns.fasta")

with open(output_path, "w") as output_file:
    for gene_name in tr.genes.keys():
        gene = tr.genes[gene_name]
        for transcript in gene.transcripts:
            output_file.write(f">{transcript.name} gene={gene.name} location={transcript.chromosome}:{transcript.position[0]}-{transcript.position[1]} strand={transcript.strand} \n{transcript.mrna_sequence} \n")


Export mRNA (introns included) to Fasta file

In [8]:
output_dir = "raw_data/dmel/transcriptome/mRNA_yes_introns"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

output_path = os.path.join(output_dir, "mRNA_yes_introns.fasta")

with open(output_path, "w") as output_file:
    for gene_name in tr.genes.keys():
        gene = tr.genes[gene_name]
        for transcript in gene.transcripts:
            output_file.write(f">{transcript.name} gene={gene.name} location={transcript.chromosome}:{transcript.position[0]}-{transcript.position[1]} strand={transcript.strand} \n{transcript.dna_sequence} \n")


Create Blast Databases

In [11]:
# Check that makeblastdb is installed
!makeblastdb -version

makeblastdb: 2.15.0+
 Package: blast 2.15.0, build Oct 19 2023 15:16:13


In [12]:
input_path = "raw_data/dmel/transcriptome/mRNA_no_introns/mRNA_no_introns.fasta"
output_path = "raw_data/dmel/transcriptome/mRNA_no_introns/mRNA_no_introns"
command = f"makeblastdb -in {input_path} -dbtype nucl -parse_seqids -out {output_path}"
subprocess.run(command, shell=True)



Building a new DB, current time: 11/09/2024 10:58:02
New DB name:   /Users/giacomo.glotzer/Desktop/Drosophila-HCR/raw_data/dmel/transcriptome/mRNA_no_introns/mRNA_no_introns
New DB title:  raw_data/dmel/transcriptome/mRNA_no_introns/mRNA_no_introns.fasta
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 3000000000B
Adding sequences from FASTA; added 35121 sequences in 0.898643 seconds.




CompletedProcess(args='makeblastdb -in raw_data/dmel/transcriptome/mRNA_no_introns/mRNA_no_introns.fasta -dbtype nucl -parse_seqids -out raw_data/dmel/transcriptome/mRNA_no_introns/mRNA_no_introns', returncode=0)

In [13]:
input_path = "raw_data/dmel/transcriptome/mRNA_yes_introns/mRNA_yes_introns.fasta"
output_path = "raw_data/dmel/transcriptome/mRNA_yes_introns/mRNA_yes_introns"
command = f"makeblastdb -in {input_path} -dbtype nucl -parse_seqids -out {output_path}"
subprocess.run(command, shell=True)



Building a new DB, current time: 11/09/2024 10:58:16
New DB name:   /Users/giacomo.glotzer/Desktop/Drosophila-HCR/raw_data/dmel/transcriptome/mRNA_yes_introns/mRNA_yes_introns
New DB title:  raw_data/dmel/transcriptome/mRNA_yes_introns/mRNA_yes_introns.fasta
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 3000000000B
Adding sequences from FASTA; added 35121 sequences in 2.73018 seconds.




CompletedProcess(args='makeblastdb -in raw_data/dmel/transcriptome/mRNA_yes_introns/mRNA_yes_introns.fasta -dbtype nucl -parse_seqids -out raw_data/dmel/transcriptome/mRNA_yes_introns/mRNA_yes_introns', returncode=0)

# Drosophila Yakuba