In [1]:
import time

import pandas as pd

from Bio import Entrez, SeqIO
from Bio.SeqRecord import SeqRecord

from entrez import select_phyla
from fasta_processing import read_fasta, read_single_fasta, dict_align_to_fasta
from data_processing import choose_best_frameshift, save_subset_df_transcripts, find_codon, download_subset_df_datasets
from datasets import download_gene_gb, parse_exon_ranges, create_exons

In [2]:
column_names = ["tax_id", "org_name", "gene_id", "current_id", "status", "symbol", "aliases", "description",
                "other_designations", "map_location", "chromosome", "genomic_nucleotide_accession.version",
                "start_position_on_the_genomic_accession", "end_position_on_the_genomic_accession", "orientation",
                "exon_count", "to_delete_1", "to_delete_2"]

df_cnidaria = pd.read_csv("../all_nxf1.txt", sep="\t", skiprows=1, names=column_names, index_col=0)
df_cnidaria.drop(["to_delete_1", "to_delete_2"], axis=1, inplace=True)
df_cnidaria.head()

Unnamed: 0_level_0,org_name,gene_id,current_id,status,symbol,aliases,description,other_designations,map_location,chromosome,genomic_nucleotide_accession.version,start_position_on_the_genomic_accession,end_position_on_the_genomic_accession,orientation,exon_count
tax_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
8407,Rana temporaria,120917577,0,live,NXF1,,nuclear RNA export factor 1,nuclear RNA export factor 1,,11,NC_053499.1,120752259.0,120809664.0,plus,22.0
8364,Xenopus tropicalis,734058,0,live,nxf1,"mex67, tap",nuclear RNA export factor 1,nuclear RNA export factor 1,,4,NC_030680.2,36493131.0,36532401.0,minus,23.0
9598,Pan troglodytes,451267,0,live,NXF1,CK820_G0030982,nuclear RNA export factor 1,nuclear RNA export factor 1,,9,NC_072407.2,65104305.0,65118449.0,minus,23.0
9913,Bos taurus,512136,0,live,NXF1,,nuclear RNA export factor 1,nuclear RNA export factor 1,,29,NC_037356.1,41099477.0,41110792.0,minus,22.0
9615,Canis lupus familiaris,483780,0,live,NXF1,,nuclear RNA export factor 1,nuclear RNA export factor 1,,18,NC_051822.1,54936964.0,54947631.0,plus,21.0


In [3]:
# cnidaria = select_phyla(df_cnidaria, "Cnidaria")

In [3]:
cnidaria_copy = {'Cnidaria': [50429, 6105, 6087, 45351]}

In [4]:
cnidaria_subset = df_cnidaria.loc[cnidaria_copy['Cnidaria']]

In [5]:
cnidaria_subset

Unnamed: 0_level_0,org_name,gene_id,current_id,status,symbol,aliases,description,other_designations,map_location,chromosome,genomic_nucleotide_accession.version,start_position_on_the_genomic_accession,end_position_on_the_genomic_accession,orientation,exon_count
tax_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
50429,Stylophora pistillata,111339613,0,live,LOC111339613,"AWC38_SpisGene17253, NXF1",nuclear RNA export factor 1-like,nuclear RNA export factor 1-like|Nuclear RNA e...,,Un,NW_019218197.1,98177.0,131366.0,plus,21.0
6105,Actinia tenebrosa,116292710,0,live,LOC116292710,,nuclear RNA export factor 1-like,nuclear RNA export factor 1-like,,Un,NW_022259381.1,16092.0,33206.0,plus,24.0
6087,Hydra vulgaris,100214173,0,live,LOC100214173,,nuclear RNA export factor 1,nuclear RNA export factor 1,,12,NC_088975.1,736736.0,739315.0,minus,2.0
45351,Nematostella vectensis,5510737,0,live,LOC5510737,NEMVEDRAFT_v1g244041,nuclear RNA export factor 1,nuclear RNA export factor 1|predicted protein,,7,NC_064040.1,11955766.0,11973360.0,plus,24.0


In [7]:
# download_subset_df_datasets(cnidaria_subset)

In [7]:
org_names = ["stylophora_pistillata", "nematostella_vectensis", "actinia_tenebrosa", "hydra_vulgaris"]

In [8]:
download_gene_gb(org_names)

stylophora_pistillata.gb has been downloaded
nematostella_vectensis.gb has been downloaded
actinia_tenebrosa.gb has been downloaded
hydra_vulgaris.gb has been downloaded


In [9]:
cnidaria_exon_ranges = parse_exon_ranges(org_names, feature_type="mRNA")

In [10]:
create_exons(cnidaria_exon_ranges)

stylophora_pistillata exons has been created
nematostella_vectensis exons has been created
actinia_tenebrosa exons has been created
hydra_vulgaris exons has been created
