# Supplementary figure - per-species KDE
This is an accessory notebook for generating the per-species KDE supplementary figure.  
It is not included in the main analysis notebooks since it takes some time and slows down the run.

In [1]:
import os
import numpy as np
import pandas as pd
import scipy
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
# Plot template
import plotly.io as pio

pio.templates["custom"] = go.layout.Template(
    layout=go.Layout(
        font_size=20,
        xaxis_mirror=True, yaxis_mirror=True,
        xaxis_rangemode='tozero', yaxis_rangemode='tozero',
        height=500, width=800
    )
)

pio.templates.default = "simple_white+custom"
template_colors = pio.templates[pio.templates.default]['layout']['colorway']

In [3]:
figures_dir = '../fig'

In [4]:
species_df = pd.read_csv('Table_S1.tsv', sep='\t', usecols=['species','full_name','phylum'])

In [21]:
def kde(v, steps=1000):
    kde = scipy.stats.gaussian_kde(v)
    vmin = v.min()
    vmax = v.max()
    step = (vmax-vmin)/steps
    x = np.arange(vmin,vmax,step)
    y = kde.evaluate(x)
    y = y/y.sum()
    return x,y

In [79]:
gff_headers = ['seqid','source','ftype','start','end','score','strand','phase','atributes']
phy_order = ['Chordata', 'Arthropoda', 'Mollusca', 'Cnidaria', 'Nematoda', 'Ascomycota', 'Streptophyta']
phyla_colors = dict(zip(phy_order, template_colors))
wd = "../output/species_level_analysis/RESULT_euk/per_species"
figures = []

for phy in phy_order:
    print(f'### {phy} ###')
    phy_species_df = species_df.query('phylum == @phy')
    species = list(phy_species_df['species'])
    cols = 3
    rows = len(species) // cols + 1
    fig = make_subplots(rows=rows, cols=cols, subplot_titles=species, shared_yaxes=True,
                       x_title='Log10 Intron length (bp)')
    r = 1
    c = 1
    min_x, max_x, min_y, max_y = [np.nan]*4
    for row in phy_species_df.iterrows():
        sp = row[1]['species']
        sp_full = row[1]['full_name']
        print(sp)
        gff = os.path.join(wd,sp_full,'annotation.canon.introns.gff3')
        gff_df = pd.read_csv(gff, sep='\t', comment='#', names=gff_headers)
        introns_df = gff_df.query('ftype == "intron"')
        intron_len = np.log10(introns_df['end'] - introns_df['start'] + 1)
        x,y = kde(intron_len)
        fig.add_trace(go.Scatter(x=x,y=y), row=r, col=c)
        x_min = min(x); y_min = min(x); x_max = max(x); y_max = max(y)
        min_x = min(x_min, min_x); min_y = min(y_min, min_y); max_x = max(x_max, max_x); max_y = max(y_max, max_y)
        c += 1
        if c > cols:
            c = 1
            r += 1
    fig.update_layout(title_text=phy, width=1000, height=200*rows,
                     margin=dict(l=150, r=80, t=80, b=80))
    fig.update_traces(marker_color=phyla_colors[phy], showlegend=False)
    fig.update_xaxes(range=[min_x, max_x])
    fig.update_yaxes(range=[min_y, max_y])
    fig.add_annotation(x=-0.12, y=0.5, text='Density', xref="paper", yref="paper", textangle=270)
    figures.append(fig)

### Chordata ###
acanthochromis_polyacanthus
accipiter_nisus
ailuropoda_melanoleuca



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



amazona_collaria
amphilophus_citrinellus
amphiprion_ocellaris
amphiprion_percula



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



anabas_testudineus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



anas_platyrhynchos



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



anas_zonorhyncha
anolis_carolinensis



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



anser_brachyrhynchus
anser_cygnoides
aotus_nancymaae
apteryx_haastii
apteryx_owenii
apteryx_rowi
aquila_chrysaetos



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



astatotilapia_calliptera



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



astyanax_mexicanus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



athene_cunicularia
balaenoptera_musculus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



betta_splendens



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



bison_bison
bos_grunniens



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



bos_indicus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



bos_mutus
bos_taurus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



branchiostoma_lanceolatum
bubo_bubo
cairina_moschata
calidris_pugnax
callithrix_jacchus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



callorhinchus_milii
camarhynchus_parvulus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



camelus_dromedarius



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



canis_lupus
capra_hircus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



carassius_auratus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



carlito_syrichta
castor_canadensis
catagonus_wagneri
catharus_ustulatus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



cavia_aperea
cavia_porcellus
cebus_imitator
cercocebus_atys
cervus_hanglu



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



chelonoidis_abingdonii
chelydra_serpentina
chinchilla_lanigera
chlorocebus_sabaeus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



choloepus_hoffmanni
chrysemys_picta
chrysolophus_pictus
ciona_savignyi
clupea_harengus
colobus_angolensis
corvus_moneduloides



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



cottoperca_gobio



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



coturnix_japonica



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



cricetulus_griseus
crocodylus_porosus
cyanistes_caeruleus
cyclopterus_lumpus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



cynoglossus_semilaevis



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



cyprinodon_variegatus
cyprinus_carpio
danio_rerio



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



dasypus_novemcinctus
delphinapterus_leucas
denticeps_clupeoides



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



dicentrarchus_labrax
dipodomys_ordii
dromaius_novaehollandiae
echeneis_naucrates



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



echinops_telfairi
electrophorus_electricus
eptatretus_burgeri
equus_asinus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



equus_caballus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



erinaceus_europaeus
erpetoichthys_calabaricus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



esox_lucius
falco_tinnunculus
felis_catus
ficedula_albicollis



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



fukomys_damarensis
fundulus_heteroclitus
gadus_morhua



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



gallus_gallus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



gambusia_affinis
gasterosteus_aculeatus
geospiza_fortis
gopherus_agassizii
gorilla_gorilla



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



gouania_willdenowi



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



haplochromis_burtoni
heterocephalus_glaber
hippocampus_comes
homo_sapiens



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



hucho_hucho
ictalurus_punctatus
ictidomys_tridecemlineatus
jaculus_jaculus
junco_hyemalis
kryptolebias_marmoratus
labrus_bergylta
larimichthys_crocea
lates_calcarifer
laticauda_laticaudata
latimeria_chalumnae
lepidothrix_coronata
lepisosteus_oculatus
leptobrachium_leishanense



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



lonchura_striata
loxodonta_africana
lynx_canadensis
macaca_fascicularis



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



macaca_mulatta



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



macaca_nemestrina
malurus_cyaneus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



manacus_vitellinus
mandrillus_leucophaeus
marmota_marmota
mastacembelus_armatus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



maylandia_zebra
meleagris_gallopavo
melopsittacus_undulatus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



meriones_unguiculatus
mesocricetus_auratus
microcebus_murinus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



microtus_ochrogaster
mola_mola
monodelphis_domestica



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



monodon_monoceros
monopterus_albus
moschus_moschiferus
mus_caroli



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



mus_musculus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



mus_pahari



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



mus_spicilegus
mus_spretus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



mustela_putorius
myotis_lucifugus
myripristis_murdjan



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



naja_naja



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



nannospalax_galili
neogobius_melanostomus
neolamprologus_brichardi
nomascus_leucogenys



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



notamacropus_eugenii
notechis_scutatus
nothobranchius_furzeri
nothoprocta_perdicaria
numida_meleagris
ochotona_princeps
octodon_degus
oncorhynchus_kisutch
oncorhynchus_mykiss



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



oncorhynchus_tshawytscha



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



oreochromis_aureus
oreochromis_niloticus
ornithorhynchus_anatinus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



oryctolagus_cuniculus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



oryzias_javanicus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



oryzias_latipes



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



oryzias_sinensis
otolemur_garnettii
otus_sunia
ovis_aries



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



pan_paniscus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



pan_troglodytes
panthera_leo
panthera_pardus
panthera_tigris
papio_anubis



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



parambassis_ranga



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



paramormyrops_kingsleyae
parus_major



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



pavo_cristatus
pelodiscus_sinensis
pelusios_castaneus
periophthalmus_magnuspinnatus
peromyscus_maniculatus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



petromyzon_marinus
phascolarctos_cinereus
phasianus_colchicus
phocoena_sinus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



physeter_catodon



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



piliocolobus_tephrosceles
podarcis_muralis



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



poecilia_formosa
poecilia_latipinna
poecilia_mexicana
poecilia_reticulata
pogona_vitticeps
pongo_abelii
procavia_capensis
prolemur_simus
propithecus_coquereli
pseudonaja_textilis
pteropus_vampyrus
pundamilia_nyererei
pygocentrus_nattereri
rattus_norvegicus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



rhinolophus_ferrumequinum



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



rhinopithecus_bieti
rhinopithecus_roxellana
saimiri_boliviensis
salarias_fasciatus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



salmo_salar



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



salvator_merianae
sander_lucioperca
sarcophilus_harrisii



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



sciurus_vulgaris



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



scleropages_formosus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



scophthalmus_maximus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



serinus_canaria
seriola_dumerili
seriola_lalandi
sinocyclocheilus_anshuiensis
sinocyclocheilus_grahami
sinocyclocheilus_rhinocerous
sorex_araneus
sparus_aurata



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



spermophilus_dauricus
sphaeramia_orbicularis



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



sphenodon_punctatus
stegastes_partitus
strigops_habroptila



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



strix_occidentalis
struthio_camelus
suricata_suricatta



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



sus_scrofa



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



taeniopygia_guttata



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



takifugu_rubripes



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



tetraodon_nigroviridis



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



theropithecus_gelada



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



tupaia_belangeri
tursiops_truncatus
urocitellus_parryii
ursus_americanus
ursus_maritimus
ursus_thibetanus
varanus_komodoensis
vicugna_pacos
vombatus_ursinus
vulpes_vulpes
xenopus_tropicalis



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



xiphophorus_couchianus
xiphophorus_maculatus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



zalophus_californianus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



zonotrichia_albicollis
zosterops_lateralis
### Arthropoda ###
acyrthosiphon_pisum
aedes_aegypti
aedes_albopictus
agrilus_planipennis
amphibalanus_amphitrite
anopheles_arabiensis
anopheles_atroparvus
anopheles_darlingi
anopheles_dirus
anopheles_farauti
anopheles_gambiae
anopheles_melas
anoplophora_glabripennis
aphidius_gifuensis
apis_dorsata
apis_florea
apis_mellifera
athalia_rosae
atta_cephalotes
bactrocera_dorsalis
bactrocera_latifrons
bactrocera_tryoni
belgica_antarctica
bemisia_tabaci
bombus_impatiens
bombus_terrestris
bombyx_mandarina
bombyx_mori
camponotus_floridanus
centruroides_sculpturatus
ceratitis_capitata
cimex_lectularius
culex_quinquefasciatus
culicoides_sonorensis
danaus_plexippus
daphnia_magna
daphnia_pulex
dendroctonus_ponderosae
dermacentor_silvarum
dermatophagoides_pteronyssinus
diabrotica_virgifera
diuraphis_noxia
drosophila_ananassae
drosophila_erecta
drosophila_grimshawi
drosophila_melanogaster
drosophila_mojavensis
drosophila_persimilis
drosophila_pseudoobscura
dr


Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



aspergillus_terreus
beauveria_bassiana
blumeria_graminis
botrytis_cinerea
candida_albicans
candida_parapsilosis
candida_tropicalis
colletotrichum_gloeosporioides
colletotrichum_graminicola
colletotrichum_higginsianum
colletotrichum_orbiculare
dothistroma_septosporum
fusarium_culmorum
fusarium_fujikuroi
fusarium_graminearum



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



fusarium_oxysporum
fusarium_pseudograminearum
fusarium_solani
fusarium_verticillioides



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



histoplasma_capsulatum
komagataella_pastoris
leptosphaeria_maculans
neurospora_crassa
pyrenophora_teres
pyricularia_oryzae
saccharomyces_cerevisiae
schizosaccharomyces_cryophilus
schizosaccharomyces_japonicus
schizosaccharomyces_octosporus
schizosaccharomyces_pombe
sclerotinia_sclerotiorum
trichoderma_reesei
trichoderma_virens
tuber_melanosporum
verticillium_dahliae
yarrowia_lipolytica
zymoseptoria_tritici
### Streptophyta ###
actinidia_chinensis
aegilops_tauschii
amborella_trichopoda
ananas_comosus
arabidopsis_halleri
arabidopsis_lyrata



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



arabidopsis_thaliana



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



arabis_alpina



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



asparagus_officinalis
avena_sativa
beta_vulgaris



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



brachypodium_distachyon



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



brassica_juncea
brassica_napus
cajanus_cajan



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



camelina_sativa



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



cannabis_sativa



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



capsicum_annuum



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



chara_braunii
chenopodium_quinoa
citrullus_lanatus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



citrus_clementina
coffea_canephora



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



corchorus_capsularis
corylus_avellana
cucumis_melo
cucumis_sativus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



cynara_cardunculus
daucus_carota



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



digitaria_exilis
dioscorea_rotundata



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



eragrostis_curvula



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



eragrostis_tef
eucalyptus_grandis
eutrema_salsugineum
ficus_carica



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



fraxinus_excelsior
glycine_max



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



gossypium_raimondii



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



helianthus_annuus



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



hordeum_vulgare
ipomoea_triloba
juglans_regia



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



kalanchoe_fedtschenkoi
lactuca_sativa



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



leersia_perrieri
lolium_perenne



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



lupinus_angustifolius
malus_domestica



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



manihot_esculenta
marchantia_polymorpha
medicago_truncatula



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



musa_acuminata
nicotiana_attenuata



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



nymphaea_colorata



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



olea_europaea
oryza_barthii
oryza_brachyantha



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



oryza_glaberrima



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



oryza_glumipatula
oryza_longistaminata
oryza_meridionalis
oryza_nivara
oryza_punctata
oryza_rufipogon
panicum_hallii



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



papaver_somniferum



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



phaseolus_vulgaris



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



physcomitrium_patens



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



pistacia_vera
pisum_sativum
populus_trichocarpa



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



prunus_avium
prunus_dulcis
prunus_persica
quercus_lobata



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



quercus_suber
rosa_chinensis



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



saccharum_spontaneum
secale_cereale
selaginella_moellendorffii
sesamum_indicum
setaria_italica
setaria_viridis



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



solanum_lycopersicum



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



solanum_tuberosum
sorghum_bicolor



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



theobroma_cacao



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



trifolium_pratense
triticum_dicoccoides
triticum_spelta
triticum_urartu



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



vigna_angularis



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



vigna_radiata



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



vigna_unguiculata



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



vitis_vinifera



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



zea_mays



Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



In [82]:
fig_name = 'fig_S4'
fig_path = os.path.join(figures_dir, fig_name + '.html')
#os.remove(fig_path)
with open(fig_path, 'a') as fo:
    for fig in figures:
        fo.write(fig.to_html(full_html=False, include_plotlyjs='cdn'))