In [74]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual, Dropdown, Button
from ipywidgets import Output, HBox, Label, Text, RadioButtons, HTML, FileUpload

import ipywidgets as widgets

import pandas as pd
import numpy as np

import os
import re

from IPython.display import clear_output
from IPython.display import display, HTML
from IPython.display import Image

import random
import base64

from sklearn.decomposition import PCA
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import plotly.graph_objects as go

from dominate import tags

import codecs
import io

In [75]:
def HTML_with_style(df, style=None, random_id=None):
    from IPython.display import HTML
    import numpy as np
    import re

    df_html = df.to_html()

    if random_id is None:
        random_id = 'id%d' % np.random.choice(np.arange(1000000))

    if style is None:
        style = """
        <style>
            table#{random_id} {{color: black}}
        </style>
        """.format(random_id=random_id)
    else:
        new_style = []
        s = re.sub(r'</?style>', '', style).strip()
        for line in s.split('\n'):
                line = line.strip()
                if not re.match(r'^table', line):
                    line = re.sub(r'^', 'table ', line)
                new_style.append(line)
        new_style = ['<style>'] + new_style + ['</style>']

        style = re.sub(r'table(#\S+)?', 'table#%s' % random_id, '\n'.join(new_style))

    df_html = re.sub(r'<table', r'<table id=%s ' % random_id, df_html)

    return HTML(style + df_html)

# Transcriptome-wide outlier detection and filtering application

### ! Beware: Saving tables will not work when using Internet Explorer !

based on hisat2-count files with (hisat2) reference file and OUTRIDER normalization 
(ref: Brechtmann <em> et al.</em> 2018 **Am J Hum Genet**) :

| Fragments | Ref |
| :- | :- |
| Genes | GCF_000001405.25_GRCh37.p13_genomic.chr.genes.bed |
| Exons | GCF_000001405.25_GRCh37.p13_genomic.chr.transcripts.bed |
| Introns | GCF_000001405.25_GRCh37.p13_genomic.chr.introns.bed |


In [76]:
def set_password(password):
    global pwd, version
    pwd = password
    version = "v02.2_010222"
    
interact(set_password, password='password');

interactive(children=(Text(value='password', description='password'), Output()), _dom_classes=('widget-interac…

In [77]:
def check_password():
    if( pwd == 'password'):
        return True
    else:
        raise ValueError('Password incorrect')

## Load data

### Select experiments and species

In [78]:
# make metadata filelist
meta_path = './metadata/'
meta = 'dekker_et_al_rnaseq_metadata.csv'
metalist = [meta]

In [80]:
# open metadata file; read metadata file and pull sample_ID's. 
# Load data for chosen sample.
# 2 dataframes, also one for exon ranking plot (load data for chosen geneID and all samples). 

def metadata (meta, species, fragment):
    global tissue, metadata_species, experiment, fragments, metadata_sample
    
    tissue = species
    fragments = fragment
    
    check_password()
    
    if meta != '':
        path = './metadata/' + meta
        metadata = pd.read_csv(path, sep = ';')
        
        if species == 'fib_untreated':
            metadata_species = metadata[(metadata['drop']!="y") & (metadata['treatment'] == 'untreated') & (metadata['species'] == 'fib')]
            
        if species == 'fib_CHX':
            metadata_species = metadata[(metadata['drop']!="y") & (metadata['treatment'] == 'CHX') & (metadata['species'] == 'fib')]
            
        experiment = 'Dekker_et_al_rnaseq'
            
        
interact_manual(metadata, meta = list(metalist),
               species = ['fib_untreated','fib_CHX'],
               fragment = ['genes','exons','introns'])

interactive(children=(Dropdown(description='meta', options=('dekker_et_al_rnaseq_metadata.csv',), value='dekke…

<function __main__.metadata(meta, species, fragment)>

In [81]:
# list of experiments
GS_exp = 'Dekker_et_al_rnaseq'
    


### Select sample

First you need to select the option 'samples' before samplelist gets updated. If you choose new species push "run interact" to refresh sample list.

In [83]:
ID_dropdown = Dropdown()

def update_options(*args):
    try:
        if( 'metadata_species' in globals() and 'GS_exp' in globals() ):
            if (tissue == 'AFC_0'):
                sample_IDs = metadata_sample.sort_values('sample_id', ascending=False)['sample_id'].unique().tolist()
                #ID_dropdown.options = sample_IDs
            elif (tissue == 'AFC_CHX'):
                sample_IDs = metadata_sample.sort_values('sample_id', ascending=False)['sample_id'].unique().tolist()
                #ID_dropdown.options = sample_IDs
            else:
                if (GS_exp != 'all'):
                    metadata_GSexp = metadata_species[metadata_species['experiment_GS'] == GS_exp]
                    sample_IDs = metadata_GSexp.sort_values('sample_id', ascending=False)['sample_id'].unique().tolist()
                else:
                    sample_IDs = metadata_species.sort_values('sample_id', ascending=False)['sample_id'].unique().tolist()
                    
            ID_dropdown.options = sample_IDs
        else:
            ID_dropdown.options = ['refresh','samples']
        
    except:
        pass
        
ID_dropdown.observe(update_options) 

interact_manual(update_options)
@interact (ID = ID_dropdown)
def set_current_ID( ID):
    global current_sample_ID
    current_sample_ID = ID
    

interactive(children=(Button(description='Run Interact', style=ButtonStyle()), Output()), _dom_classes=('widge…

interactive(children=(Dropdown(description='ID', options=('Dekker_et_al_individual9_untreated', 'Dekker_et_al_…

### Show PCA plot

Select dataset which you want to use.
Shown countdata of previous selected species before OUTRIDER normalisation

In [84]:
# PCA plot from countdata
def PCAplot ():
    count_path = "./countdata/"
    file = 'dekker_et_al_rnaseq_' + fragments + '_counts.tsv'
    
    if( 'experiment' in globals() and 'fragments' in globals()):
    
        f = pd.read_csv(count_path+file, sep = '\t').set_index('Unnamed: 0')

        # only samples filtered in metadata
        f_res = f[f.columns & metadata_species['sample_id']]

        pca = PCA(n_components=5)
        pca.fit(f_res.T)
        data_pca = pca.transform(f_res.T)
        data_pca = pd.DataFrame( data_pca , index = f_res.T.index )

        # Formatting for PCA
        data_pca.reset_index(inplace=True)

        # Percentage of variance
        variance = pca.explained_variance_ratio_ 

        red = data_pca.loc[data_pca['index'] == current_sample_ID]
        
        # figure
        fig = make_subplots(rows=1, cols=1)
        # all data
        fig.add_scatter(x=data_pca[0], y=data_pca[1], mode="markers",
                        marker=dict(color="Blue"),
                        row=1, col=1, text=data_pca['index'])
        # sample data       
        fig.add_scatter(x=red[0], y=red[1],
                    marker=dict(color="Red"),
                    row=1, col=1, text=current_sample_ID)
        
        fig.update_traces(hoverinfo = 'text', selector = dict(type='scatter'), 
                           hoverlabel = dict(namelength = -1))
        fig.update_layout(title= "PCA plot before normalisation: "+current_sample_ID, title_x = 0.5, height=550, width = 550, 
                          showlegend = False, template = "simple_white")
        fig.update_xaxes(title_text="PC 1 ({}% variance) ".format(int(variance[0] *100)), showgrid=False)
        fig.update_yaxes(title_text="PC 2 ({}% variance) ".format(int(variance[1] *100)), showgrid=False)

        fig.show()
    
interact_manual(PCAplot)

interactive(children=(Button(description='Run Interact', style=ButtonStyle()), Output()), _dom_classes=('widge…

<function __main__.PCAplot()>

### Select results-file 

Loading the data might take a while (even up to 5 minutes).

In [86]:
# load outrider resultfile
def path_results ():
    global stats, filename, path, resultfile
   
    check_password()
    
    main_path = './outrider/'
    file = 'dekker_et_al_rnaseq_' + tissue + '_res_outrider_' + fragments + '_counts.tsv.gz'
    
    path = main_path + file
    filename = path
    
    stats = pd.read_csv(path, compression='gzip')

interact_manual(path_results)

interactive(children=(Button(description='Run Interact', style=ButtonStyle()), Output()), _dom_classes=('widge…

<function __main__.path_results()>

In [87]:
# list of genepanels
path_to_gene_panels= './genepanels/'

gene_panel_with_genes = {}
for gene_panel in os.listdir(path_to_gene_panels):
    if( gene_panel.endswith('.csv')):
        try:
            df = pd.read_csv(path_to_gene_panels+ gene_panel ,header = None)
            df = df[0].tolist()
            gene_panel_with_genes[ gene_panel.replace('.csv','')] = df
        except:
            pass

style = {'description_width': 'initial'}

## Filter on Z-scores and p-value, HPO terms, ROH, genomic position

In [88]:
# filtering of zScores
ascending_sort_dict = {'zScore_abs':['zScore_abs',False], 
                       'pValue':['pValue',True], 
                       'geneID':['geneID',True], 
                       'gene':['gene',True],
                       'zScore_neg':['zScore',True], 
                       'zScore_pos':['zScore',False] }

def filter_Z_score (gene_panel, Z_threshold, p_threshold, sort_by, only_significant, search_gene_specific, search_gene_global, 
                    search_genes_from_list, hpo_term_name, hpo_term_id, hpo_id_in_list, hpo_id_all_list, 
                    chrom, chr_start, chr_end, position_upload):
    
    check_password()
    
    global s_gene, df_filtered, panel, Z, df_position, position, uploaded_filename, genes, df_hpo
    
    s_gene = search_gene_specific
    panel = gene_panel
    Z = Z_threshold
    position = position_upload
    
    if( 'stats' in globals() and 'current_sample_ID' in globals()):
        Z_threshold = float(Z_threshold)
        p_threshold = float(p_threshold)
                
        df = stats

        # distributionplot
        try:
            Zs = df['zScore'].values
            hist_data = [Zs]
            group_labels = ['distplot']

            fig = ff.create_distplot(hist_data, group_labels, show_rug=False, bin_size=.2)
            fig.update_layout(title= current_sample_ID, title_x = 0.4, height = 500, width = 600,
                          showlegend = False)
            fig.update_xaxes(title_text="Z-Score", showgrid=False)
            fig.update_yaxes(title_text="counts", showgrid=False)
            fig.add_vline( np.median(Zs), line_color="black")
            
            fig.show()
            
        except:
            pass
        

        # Not all? then select gene panel
        if( gene_panel != 'all' ):
            genes = gene_panel_with_genes[gene_panel]
            df = df.loc[ df['gene'].isin(genes) ]
        else:
            genes = []
            

        # filter results on thresholds
        df = df.loc[ df['zScore'].abs() >= Z_threshold]
        df = df.loc[ df['pValue'] <= p_threshold]
        
        
        # significant filtering
        if only_significant == 'yes':
            df = df.loc[ (df['aberrant'] == True) ]
            

        # filter on specific genes
        if( search_gene_specific != '' ):
            df = df.loc[ df['gene'] == search_gene_specific ]
            
        elif(search_gene_global != ''):
            df = df.loc[ df['gene'].str.contains( search_gene_global,na=False, case=False ) ]
      
        elif(search_genes_from_list != ''):
            genes = [ gene.strip(' ') for gene in search_genes_from_list.split('|') ]
            df = df.loc[ df['gene'].isin(genes) ]
      
        else:
            pass  
       

        # filtering on HPO terms
        loc_hpo = './hpo/phenotype_to_genes.txt'
        
        # filter on HPO id
        if hpo_term_id != "":
            if hpo_term_name != "":
                display(widgets.HTML(tags.h5("Fill in only HPO_term_id or HPO_term_name, not both").render()))
            elif hpo_id_in_list != "":
                display(widgets.HTML(tags.h5("Fill in only HPO_term_id or gene of a HPO id in list, not both").render()))
            elif hpo_id_all_list != "":
                display(widgets.HTML(tags.h5("Fill in only HPO_term_id or gene of all HPO ids from list, not both").render()))
            else:
                if hpo_term_id.startswith('HP'):
                    hpo_term_id = hpo_term_id
                else:
                    hpo_term_id = 'HP:' + str(hpo_term_id)
                display(widgets.HTML(tags.h5("Filter on HPO_term_id: "+hpo_term_id).render()))
                df_hpo = pd.read_csv(loc_hpo, sep='\t', header = 0,
                             names = ["HPO_term_id","HPO_term_name","entrez_gene_id","entrez_gene_name", "Frequency_HPO",
                                      "G-D_source", "disease_id"])
                df_hpo = df_hpo.loc[df_hpo["HPO_term_id"] == hpo_term_id]
                
                hpo_genes = df_hpo['entrez_gene_name']
                df = df.loc[ df['gene'].isin(hpo_genes) ]
                # extra column for HPO term
                df['Present_HPO_term'] = hpo_term_id
        
        #filter on HPO name
        elif hpo_term_name != "":
            display(widgets.HTML(tags.h5("Filter on HPO_term_name: " + hpo_term_name).render()))
            hpo_term_name = hpo_term_name.lower()
            df_hpo = pd.read_csv(loc_hpo, sep='\t', header = 0, 
                                 names = ["HPO_term_id","HPO_term_name","entrez_gene_id","entrez_gene_name", "Frequency_HPO",
                                          "G-D_source","disease_id"])
            df_hpo = df_hpo.loc[ df_hpo['HPO_term_name'].str.contains( hpo_term_name, na=False, case=False ) ]          
            
            # only necessary columns
            df_hpo_name = df_hpo[['entrez_gene_name','HPO_term_id','HPO_term_name']]
            
            # filter on genes
            hpo_genes = df_hpo['entrez_gene_name']
            df = df.loc[ df['gene'].isin(hpo_genes) ]
            # extra column for HPO term; merge df and df_hpo_name
            df_merge = pd.merge(left=df, right=df_hpo_name, how='left', left_on='gene', right_on='entrez_gene_name')
            df = df_merge.drop(['entrez_gene_name'], axis =1).rename(columns={'HPO_term_name':'Present_HPO_name', 'HPO_term_id':'Present_HPO_term'})
            

        #filter on HPO list (of/of)
        elif hpo_id_in_list != "":
            if hpo_id_all_list != "":
                display(widgets.HTML(tags.h5("Fill in only 'gene of a HPO id in list:' or 'gene of all HPO ids from list:', not both").render()))
            else:
                # filtering on HPO terms
                hpo_ids = [ ids.strip(' ') for ids in hpo_id_in_list.split('|') ]
                display(widgets.HTML(tags.h5("Filter on genes containing at least one HPO id from list: "+ str(hpo_ids)).render()))

                df_hpo = pd.read_csv(loc_hpo, sep='\t', header = 0,
                                     names = ["HPO_term_id","HPO_term_name","entrez_gene_id","entrez_gene_name", 
                                              "Frequency_HPO","G-D_source", "disease_id"])
                # total genelist from hpo file
                gene_list = set(df_hpo["entrez_gene_name"])

                # make empty dataframe with only headers of df
                # list of columnnames df
                list_names = list(df.columns.values.tolist())
                # add column for HPO term
                list_names.append('Present_HPO_term')
                #make empty dataframe
                df_empty = (pd.DataFrame(list_names).T)
                df_empty = df_empty.rename(columns=df_empty.iloc[0]).drop(df_empty.index[0])

                for ids in hpo_ids:
                    # search genes for the hpo term
                    df_hpo_gene = df_hpo.loc[df_hpo["HPO_term_id"] == ids]
                    # make list of the genes for the hpo term
                    set_gene = set(df_hpo_gene['entrez_gene_name'])
                    # intersection
                    gene_list = list(set_gene.intersection(gene_list))
                    # make dataframe of only genes from this hpo_id
                    df_ids = df.loc[ df['gene'].isin(gene_list) ]
                    # extra column for present HPO terms
                    df_ids['Present_HPO_term'] = ids
                    # append to df_empty
                    df_empty = df_empty.append(df_ids, ignore_index=True)

                df_empty['freq_HPO'] = df_empty.groupby(by='geneID')['geneID'].transform('count')
                df = df_empty
            
        #filter on HPO list (en/en)
        elif hpo_id_all_list != "":
            hpo_ids = [ ids.strip(' ') for ids in hpo_id_all_list.split('|') ]
            display(widgets.HTML(tags.h5("Filter on genes containing all HPO id from list: "+ str(hpo_ids)).render()))
            
            df_hpo = pd.read_csv(loc_hpo, sep='\t', header = 0,
                                 names = ["HPO_term_id","HPO_term_name","entrez_gene_id","entrez_gene_name", "Frequency_HPO",
                                          "G-D_source", "disease_id"])
            gene_list = set(df_hpo["entrez_gene_name"])
            
            for ids in hpo_id_all_list.split('|'):
                ids = ids.strip(' ')
                df_hpo_gene = df_hpo.loc[df_hpo["HPO_term_id"] == ids]
                set_gene = set(df_hpo_gene['entrez_gene_name'])
                #intersection
                gene_list = set_gene.intersection(gene_list)

            gene_all_hpo = list(gene_list)   
            df = df.loc[ df['gene'].isin(gene_all_hpo) ]
            # extra column for present HPO terms
            df['Present_HPO_term'] = hpo_id_all_list.replace("|",";")
            
        else:
            pass
            

        # chrom filtering
        if chrom != '':
            if chrom.startswith('chr'):
                chrom = str(chrom)
            else:
                chrom = 'chr' + str(chrom)            
            if chr_start != '':
                chr_start = float(chr_start)
                if chr_end == '':
                    raise ValueError("Fill in chr_end position; for whole chromosome no value needed")
                else:
                    chr_end = float(chr_end)
                    df = df.loc[ (df['chr'] == chrom) & (df['start'] >= chr_start) & (df['end'] <= chr_end)]
            else:
                if chr_end == '':
                    df = df.loc[ (df['chr'] == chrom) ]
                    display(widgets.HTML(tags.h5("Filtering on whole chromosome").render()))
                else:
                    raise ValueError("Fill in chr_start position; for whole chromosome no value needed")
       

        # filtering on ROH areas by uploadfile (tab-delimited; chr start end)
        # upload chrom position file
        uploaded_filename = "NA"
        if len(position) != 0:
            uploaded_filename = next(iter(position))
            display(widgets.HTML(tags.h3("Upload file: "+uploaded_filename).render()))
            df_position = pd.read_csv(io.BytesIO(position[uploaded_filename]['content']), sep='\t', header = None)
            df_position.columns = ['search_chrom', 'search_chrom_start', 'search_chrom_end']
            
            # filter df
            # make empty dataframe
            # search per line of df_position make dataframe and append to empty df
            df_empty = pd.DataFrame(columns= list(df.columns))
            for i, j in df_position.iterrows():
                chrom = str(j[0])
                if chrom.startswith('chr'):
                    chrom = chrom
                else:
                    chrom = 'chr' + chrom
                start = j[1]
                end = j[2]
                df_new = df.loc[ (df['chr'] == chrom ) & (df['start'] >= start) & (df['end'] <= end) ]
                df_empty = df_empty.append(df_new, ignore_index=True)
            df = df_empty

        else:
            display(widgets.HTML(tags.h5("No Upload file present").render()))

            df_position = ''
            

        # add column for genomic position
        df = df.assign(genomic_position = df['chr']
                       + ":"
                       + df['start'].values.astype(str)
                       + "-"
                       + df['end'].values.astype(str)
                       )
        
        # new columns for FoldChange and absolute Zscore       
        df = df.assign(FC = 2**df['l2fc'])
        df = df.assign(zScore_abs = df['zScore'].abs() )
        
        # add column for link
        df = df.assign(IGV_url = "http://localhost:60151/load?sessionURL=" 
                       + df['link_bam'] 
                       + "&genome=hg19&locus="
                       + df['chr']
                       + ":"
                       + df['start'].values.astype(str)
                       + "-"
                       + df['end'].values.astype(str)
                       + "&merge=false")
        
        # delete not necessary columns
        df = df.drop(['link_bam'], axis=1)
        df = df.drop(['theta','padj_rank','AberrantBySample','AberrantByGene'], axis=1)
        
        # sort
        col, TF = ascending_sort_dict[sort_by]
        df = df.sort_values(by= col, ascending = TF)

        df_filtered = df
        
        display(HTML_with_style(df.iloc[0:50]))

        
# define gene_panels
gene_panels = list( gene_panel_with_genes.keys() )
gene_panels.append('all')
gene_panels.sort(key=str.lower)

# display
display(widgets.HTML(tags.h5("For ROH filtering a tab-delimited *.txt file without headers can be "
                             "uploaded (format: chr startpos endpos). For filtering on whole chromosome only fill in chrom.").render()))


uploader = FileUpload(accept='.txt', multiple=False)
#display(uploader)

# a upload box show then click it and upload the test file
button = widgets.Button(description="Delete upload file")
display(button)

def on_button_clicked(b):
    uploader._counter = 0
    uploader.value.clear()

button.on_click(on_button_clicked)


# interact manual
interact_manual(filter_Z_score,
                gene_panel = gene_panels,
                Z_threshold = '3',
                p_threshold = '0.01',
                sort_by = list(ascending_sort_dict.keys()),
                only_significant = RadioButtons(
                    options=['no','yes'],
                    value='no', 
                    description='only significant?',
                    disabled=False, style = style),
                search_gene_specific = Text(
                    value='', 
                    placeholder='specific gene', 
                    description='specific genename:', 
                    style = style, disabled=False),
                search_gene_global = Text(
                    value='', 
                    placeholder='part of gene', 
                    description='part of gene name:', 
                    style = style, disabled=False),
                search_genes_from_list = Text(
                    value='', 
                    placeholder='gene1|gene2', 
                    description='genes from list:', 
                    style = style, disabled=False),
                hpo_term_name = Text(
                    value='', 
                    placeholder='External ear malformation', 
                    description='HPO name:', 
                    style = style, disabled=False),
                hpo_term_id = Text(
                    value='', 
                    placeholder='HP:0012125', 
                    description='HPO id:', 
                    style = style, disabled=False),
                hpo_id_in_list = Text(
                    value='', 
                    placeholder='HP:0000001|HP:0000002', 
                    description='single HPO id in list:', 
                    style = style, disabled=False),
                hpo_id_all_list = Text(
                    value='', 
                    placeholder='HP:0000001|HP:0000002', 
                    description='all HPO ids from list:', 
                    style = style, disabled=False),
                chrom = '',
                chr_start = '',
                chr_end = '',
                position_upload = uploader
               );


HTML(value='<h5>For ROH filtering a tab-delimited *.txt file without headers can be uploaded (format: chr star…

Button(description='Delete upload file', style=ButtonStyle())

interactive(children=(Dropdown(description='gene_panel', options=('all', 'AMYLO_V1', 'ANEU_V10', 'AUT_V5', 'AU…

### Download filtered list

In [58]:
# download filtered list
button = widgets.Button(description='Save table')
out = widgets.Output()

def on_button_clicked(_):
    check_password()
    
    # "linking function with output"
    with out:
        # what happens when we press the button
        file = 'file://tmp/filtered_list.tsv'
        if( 'df_filtered' in globals() ):
            if( df_filtered.empty ):
                print("Filtered list is empty")
                
            else:
                clear_output()
                
                title = "RNAseq_filtering version: "+version+"; Tissue: "+tissue +"; Resultfile: "+resultfile+"; gene_panel: "+panel+"; Z-threshold: "+Z+"; ROH file: "+uploaded_filename
                index = df_filtered.index
                index.name = title
                
                res = df_filtered.to_string()
                #FILE
                filename = 'res.tsv'
                b64 = base64.b64encode(res.encode())
                payload = b64.decode()

                #BUTTONS
                html_buttons = '''<html>
                <head>
                <meta name="viewport" content="width=device-width, initial-scale=1">
                </head>
                <body>
                <a download="{filename}" href="data:text/csv;base64,{payload}" download>
                <button class="p-Widget jupyter-widgets jupyter-button widget-button mod-warning">Download File</button>
                </a>
                </body>
                </html>
                '''
                html_button = html_buttons.format(payload=payload,filename=filename)
                display(HTML(html_button))

        else:
            print("No filtered list available!")

# linking button and function together using a button's method
button.on_click(on_button_clicked)
# displaying button and its output together
widgets.VBox([button,out])

VBox(children=(Button(description='Save table', style=ButtonStyle()), Output()))

## Volcano plot

In [59]:
def volcano_plot(Z_threshold, p_threshold, max_gene_annot, only_panelgenes ):
    check_password()
    
    Z_threshold = float(Z_threshold)
    p_threshold = float(p_threshold)
    max_gene_annot = int(max_gene_annot)
    
    df = stats
    
    df_orange = df.loc[ df['zScore'].abs() >= Z_threshold]
    df_orange = df_orange.loc[ df['pValue'] <= p_threshold]
    df_orange = df_orange.iloc[0:max_gene_annot]
    
    if( 'genes' in globals() ):
        if genes:
            df_red = df.loc[df['gene'].isin(genes)]
            df_red = df_red.loc[ df_red['zScore'].abs() >= Z_threshold]
        else:
            df_red = df.loc[ df['zScore'].abs() >= Z_threshold]
            
    df_red = df_red.loc[ df_red['pValue'] <= p_threshold]
    df_red = df_red.iloc[0:max_gene_annot]
    
    #plotly    
    fig = make_subplots(rows=1, cols=1)
    # all data
    fig.add_scatter(x = df['zScore'].values[0:10000], 
                    y = - np.log( df['pValue'] )[0:10000], 
                    mode="markers",
                    marker=dict(color="Blue"),
                    row=1, col=1)
    
        
    # sample data all genes
    if (only_panelgenes == "no"):
        gene_names = df_orange['gene']
        fig.add_scatter(x = df_orange['zScore'], 
                        y = - np.log( df_orange['pValue'] ), 
                        mode="markers+text",
                        marker=dict(color="orange"),
                        row=1, col=1, text = gene_names, textposition = 'top center', textfont_size = 10)
    
    # sample data panelgenes 
    gene_names = df_red['gene']
    fig.add_scatter(x = df_red['zScore'], 
                    y = - np.log( df_red['pValue'] ), 
                    mode="markers+text",
                    marker=dict(color="darkred"),
                    row=1, col=1, text = gene_names, textposition = 'top center', textfont_size = 10)
    
 
    fig.update_layout(title= current_sample_ID, title_x = 0.5, height = 750, width = 1000,
                      showlegend = False)
    fig.update_xaxes(title_text="Z-Score", showgrid=False)
    fig.update_yaxes(title_text="-log10(Pvalue)", showgrid=False)    
        
    
    display(fig)

interact_manual(volcano_plot,
                Z_threshold = Text(
                    value='5', 
                    placeholder='5', 
                    description='Z score threshold:', 
                    style = style, disabled=False),
                p_threshold = Text(
                    value='0.01', 
                    placeholder='0.01', 
                    description='p-value threshold:', 
                    style = style, disabled=False),
                max_gene_annot = Text(
                    value='100', 
                    placeholder='100', 
                    description='max number of annotated genes:', 
                    style = style, disabled=False),
                only_panelgenes = RadioButtons(
                    options=['yes','no'],
                    value='yes', 
                    description='show only panelgenes?',
                    disabled=False, style = style)
           );

interactive(children=(Text(value='5', description='Z score threshold:', placeholder='5', style=DescriptionStyl…

## All fragments in Gene of Interest



In [18]:
# show all fragments for a gene of interest (only for exons and introns)
def gene_of_interest_plot(search_gene):
    check_password()
    
    df = stats
    
    if search_gene.strip(' ') == '':
        print("Submit gene name.")
        
    else:      
        # define chr.start position
        df = df.assign(start1 = df['geneID'].apply(lambda x: x.split('_')[1]  ))        
        df = df.loc[ df['gene'] == search_gene]

        if( df.empty):
            print("Invalid gene name or gene not in data")
            
        else:
            df.sort_values('geneID', axis = 0, inplace=True, ascending =True)

            # plot  
            if re.search('intron',filename):
                x = df['geneID']
                y = df['zScore']
               
            else:
                x = df['start1']
                y = df['zScore']
                
            fig = px.line(x = x, y = y)
            fig.update_xaxes(title_text="chr_start_position", showgrid=True)
            fig.update_yaxes(title_text=current_sample_ID, showgrid=False)
            fig.add_hline(y=0, line_dash="dash", line_color = 'grey')
            fig.update_layout(title= search_gene, title_x = 0.5, height=550, 
                      showlegend = False)

            fig.show()
            
    
interact_manual(gene_of_interest_plot,
               search_gene = Text(
                    value='', 
                    placeholder='RALGAPA1', 
                    description='search gene:', 
                    style = style, disabled=False))

interactive(children=(Text(value='', description='search gene:', placeholder='RALGAPA1', style=DescriptionStyl…

<function __main__.gene_of_interest_plot(search_gene)>

## Exon ranking plot

This may take a while (up to a few minutes).
You can fill in a (unique) part of the geneID

In [19]:
# compare zScore of a fragment: chosen sample vs all samples
def ranking_plot(gene_ID):
    check_password()
    
    global df_gene
    
    if gene_ID == '':
        print('invalid gene_ID')
    
    else:
        df_gene = pd.read_csv(path)

        df_gene.sort_values('normcounts', axis = 0, inplace=True, ascending =True)

        df_red = df_gene.loc[ df_gene['sampleID'] == current_sample_ID]

        clear_output()
        
        # plot
        fig = make_subplots(rows=1, cols=1)
        # all data
        fig.add_scatter(x=df_gene['sampleID'], y = df_gene['normcounts'], mode="markers",
                        marker=dict(color="lightgrey"),
                        row=1, col=1, text=df_gene['sampleID'])
        # sample data       
        fig.add_scatter(x = df_red['sampleID'], y = df_red['normcounts'],
                    marker=dict(color="Red"),
                    row=1, col=1, text=df_gene['sampleID'])

        fig.update_traces(hoverinfo = 'text', selector = dict(type='scatter'), 
                           hoverlabel = dict(namelength = -1))
        fig.update_layout(title= gene_ID, title_x = 0.3, height=800, width = 1200, 
                          showlegend = False, template = "simple_white")
        fig.update_xaxes(title_text="sampleID", showgrid=False, automargin=True) #font smaller
        fig.update_yaxes(title_text="norm. counts", showgrid=False)

        fig.show()

        # display table
        display(HTML_with_style(df_gene[["sampleID","normcounts"]]))
    

interact_manual(ranking_plot,
                gene_ID = Text(
                    value='', 
                    placeholder='chr17_79649179_79650042_ARL16', 
                    description='gene_ID:', 
                    style = style, disabled=False)
        )

interactive(children=(Text(value='', description='gene_ID:', placeholder='chr17_79649179_79650042_ARL16', styl…

<function __main__.ranking_plot(gene_ID)>

## Z-score on chromosome

In [20]:
# global view of zScores (chosen sample) along the chromosomes
def plot_Z_scores_on_chromosome2(N_Z_scores):
    global df
    check_password()

    N = int(N_Z_scores)
    df  = stats

    if( N > df.shape[0]):
        N = df.shape[0] -1

    ind = random.sample( list(range(len(df))),N) 
    df = df.iloc[ ind,: ]

    df = df.sort_values(by = ['chr', 'start'])

    chromosomes = df['chr'].unique()

    # color
    color = px.colors.qualitative.Alphabet
    chr_with_color = { c: color[i] for i,c in enumerate(chromosomes)}

    gb = df.groupby('chr')
    chromomes = [ 'chr'+str(i) for i in range(1,23)]
    chromomes.extend(['chrX','chrY'])
    chromomes.sort()

    fig = make_subplots(rows=13, cols=2, subplot_titles= chromomes)

    odd = 1
    even = 1
    for i, chromosome in enumerate(chromomes):
        if( chromosome not in gb.groups):
            continue
        
        df_chrom = gb.get_group(chromosome)
        
        # define color
        c = chr_with_color[chromosome] 

        # define data
        x = df_chrom['start'].sort_values()
        y=df_chrom['zScore']
        y_rol = y.rolling(window= int( len(df_chrom) *0.05),center=True).mean()
        
        # subplots
        if i % 2:            
            fig.add_scatter(x=x, y=y, mode="markers",
                                marker=dict(color=c),
                                row=even, col=2, name=chromosome)
            fig.add_scatter(x=x, y=y_rol, mode="lines",
                                line_color = 'black',
                                row=even, col=2)
            even += 1
            
        else:
            fig.add_scatter(x=x, y=y, mode="markers",
                                marker=dict(color=c),
                                row=odd, col=1, name=chromosome)
            fig.add_scatter(x=x, y=y_rol, mode="lines",
                                line_color = 'black',
                                row=odd, col=1)            
            odd += 1
        
        
        fig.update_layout(title= current_sample_ID, title_x = 0.5, width = 1400, height = 8000, 
                          showlegend = False)
        fig.update_xaxes(title_text="Chromosomal position", showgrid=False)
        fig.update_yaxes(title_text="Z-Score", showgrid=False)
        fig.add_hline(y=0, line_dash="dash", line_color = 'grey')
        
    
    display(fig)


interact_manual(plot_Z_scores_on_chromosome2, N_Z_scores = '100000' );
        

interactive(children=(Text(value='100000', description='N_Z_scores'), Button(description='Run Interact', style…

## Panels of filtered genes

In [21]:
# show all genepanels of filtered genes
def panel_filtered_genes():
    global df_panel
    
    goi = list(set(df_filtered['gene']))
    display(widgets.HTML(tags.h5("Genepanels of all filtered genes: "+ str(goi)).render()))

    panels = gene_panel_with_genes
    list_genes = goi
    l_key = []
    list_of_lists = []

    for search_val in list_genes:
        for key, val in panels.items():
            if search_val in val:
                list_of_lists.append([search_val,key])

    df_panel = pd.DataFrame(list_of_lists, columns=["gene","panels"])

    display(df_panel)
    
interact_manual(panel_filtered_genes)

interactive(children=(Button(description='Run Interact', style=ButtonStyle()), Output()), _dom_classes=('widge…

<function __main__.panel_filtered_genes()>

In [22]:
# download
button = widgets.Button(description='Save table panels')
out1 = widgets.Output()

def on_button_clicked1(_):
    check_password()
    
    # "linking function with output"
    with out1:
        # what happens when we press the button
        file = 'file://tmp/panels_filtered_list.tsv'
        if( 'df_panel' in globals() ):
            if( df_panel.empty ):
                print("Panels of filtered genelist is empty")
                
            else:
                clear_output()
                
                title = "RNAseq_filtering version: "+version+"; Tissue: "+tissue +"; Resultfile: "+resultfile+"; gene_panel: "+panel+"; Z-threshold: "+Z+"; ROH file: "+uploaded_filename
                index = df_panel.index
                index.name = title
                
                res1 = df_panel.to_string()
                #FILE
                filename = 'panels.tsv'
                b64 = base64.b64encode(res1.encode())
                payload = b64.decode()

                #BUTTONS
                html_buttons = '''<html>
                <head>
                <meta name="viewport" content="width=device-width, initial-scale=1">
                </head>
                <body>
                <a download="{filename}" href="data:text/csv;base64,{payload}" download>
                <button class="p-Widget jupyter-widgets jupyter-button widget-button mod-warning">Download File</button>
                </a>
                </body>
                </html>
                '''
                html_button = html_buttons.format(payload=payload,filename=filename)
                display(HTML(html_button))

        else:
            print("No panellist available!")

# linking button and function together using a button's method
button.on_click(on_button_clicked1)
# displaying button and its output together
widgets.VBox([button,out1])

VBox(children=(Button(description='Save table panels', style=ButtonStyle()), Output()))

## HPO terms of filtered genes

In [23]:
# show all HPO terms of the filtered genes
def hpo_filtered_genes():
    global df_hpo
    
    # df for hpo terms
    loc_hpo = './hpo/genes_to_phenotype.txt'
    df_hpo = pd.read_csv(loc_hpo, sep='\t', header = 0,
                         names = ["entrez_gene_id","entrez_gene_symbol","HPO_term_id","HPO_term_name", 
                                  "Frequency_Raw","Frequency_HPO","Additional_info", "G-D_source","disease_id"])

    # list of genes from df_filtered
    goi = list(set(df_filtered['gene']))
    display(widgets.HTML(tags.h5("HPO terms of all filtered genes: "+ str(goi)).render()))
    df_hpo = df_hpo.loc[df_hpo["entrez_gene_symbol"].isin(goi)]
    display(HTML_with_style(df_hpo.iloc[:]))

interact_manual(hpo_filtered_genes)


interactive(children=(Button(description='Run Interact', style=ButtonStyle()), Output()), _dom_classes=('widge…

<function __main__.hpo_filtered_genes()>

In [None]:
# download
button = widgets.Button(description='Save table HPO')
out2 = widgets.Output()

def on_button_clicked2(_):
    check_password()
    
    # "linking function with output"
    with out2:
        # what happens when we press the button
        file = 'file://tmp/hpo_filtered_list.tsv'
        if( 'df_hpo' in globals() ):
            if( df_hpo.empty ):
                print("HPO filtered list is empty")
                
            else:
                clear_output()
                
                title = "RNAseq_filtering version: "+version+"; Tissue: "+tissue +"; Resultfile: "+resultfile+"; gene_panel: "+panel+"; Z-threshold: "+Z+"; ROH file: "+uploaded_filename
                index = df_hpo.index
                index.name = title
                
                res2 = df_hpo.to_string()
                #FILE
                filename = 'hpo.tsv'
                b64 = base64.b64encode(res2.encode())
                payload = b64.decode()

                #BUTTONS
                html_buttons = '''<html>
                <head>
                <meta name="viewport" content="width=device-width, initial-scale=1">
                </head>
                <body>
                <a download="{filename}" href="data:text/csv;base64,{payload}" download>
                <button class="p-Widget jupyter-widgets jupyter-button widget-button mod-warning">Download File</button>
                </a>
                </body>
                </html>
                '''
                html_button = html_buttons.format(payload=payload,filename=filename)
                display(HTML(html_button))

        else:
            print("No HPO filtered list available!")

# linking button and function together using a button's method
button.on_click(on_button_clicked2)
# displaying button and its output together
widgets.VBox([button,out2])

## HPO terms of gene of interest

In [None]:
# Show HPO terms for a gene of interest
def gene_of_interest_hpo(hpo_gene):
    check_password()
    
    if hpo_gene.strip(' ') == '':
        print("Submit gene name.")
        
    else:      
        loc_hpo = './hpo/genes_to_phenotype.txt'
        df_hpo = pd.read_csv(loc_hpo, sep='\t', header = 0,
                             names = ["entrez_gene_id","entrez_gene_symbol","HPO_term_id","HPO_term_name", "Frequency_Raw","Frequency_HPO",
                                      "Additional_info", "G-D_source","disease_id"])
        df_hpo = df_hpo.loc[df_hpo['entrez_gene_symbol'] == hpo_gene]
        
        # sort
        df_hpo = df_hpo.sort_values(by= "HPO_term_id")
        
        display(HTML_with_style(df_hpo.iloc[:]))

interact_manual(gene_of_interest_hpo,
               hpo_gene = Text(
                    value='', 
                    placeholder='RALGAPA1', 
                    description='search gene:', 
                    style = style, disabled=False))

## Genes with HPO_term

In [None]:
# Find genes with a certain HPO term (name or id)
def hpo(hpo_term_name, hpo_term_id):
    check_password()
   
    loc_hpo = './hpo/phenotype_to_genes.txt'
    df_hpo = pd.read_csv(loc_hpo, sep='\t', header = 0,
                         names = ["HPO_term_id","HPO_term_name","entrez_gene_id","entrez_gene_name", "Frequency_HPO","G-D_source",
                                  "disease_id"])
    if hpo_term_id != "":
        if hpo_term_name != "":
            display("fill in only HPO_term_id or HPO_term_name, not both")
        else:
            if hpo_term_id.startswith('HP'):
                hpo_term_id = hpo_term_id
            else:
                hpo_term_id = 'HP:' + str(hpo_term_id)
            df_hpo = df_hpo.loc[df_hpo["HPO_term_id"] == hpo_term_id]
            # sort
            df_hpo = df_hpo.sort_values(by= "entrez_gene_name")
            display(HTML_with_style(df_hpo.iloc[:]))
    else:
        if hpo_term_name == "":
            display("fill in only HPO_term_id or HPO_term_name, not both")
        else:
            hpo_term_name = hpo_term_name.lower()
            df_hpo = df_hpo.loc[ df_hpo['HPO_term_name'].str.contains( hpo_term_name, na=False, case=False ) ]
            # sort
            df_hpo = df_hpo.sort_values(by= "entrez_gene_name")
            display(HTML_with_style(df_hpo.iloc[:]))
        
interact_manual(hpo,
               hpo_term_name = Text(
                    value='', 
                    placeholder='External ear malformation', 
                    description='HPO term name:', 
                    style = style, disabled=False),
                hpo_term_id = Text(
                    value='', 
                    placeholder='HP:0012125', 
                    description='HPO term id:', 
                    style = style, disabled=False)
               )

## Panelgenes without HPO terms
#### Tabel of genes from the chosen panel without available HPO terms.

In [None]:
# which panelgenes do not have HPO terms available (only works when a panel is chosen at the filtering of zScores)
def hpo_panel():
    check_password()
    
    if ('panel' in globals() ):
        if panel == 'all':
            display(widgets.HTML(tags.h5("No panel selected").render()))
        else:    
            display(widgets.HTML(tags.h5("Panel: "+ panel).render()))  

            # make list of unique panel genes
            loc_panel = './panels_alias_or/'
            panelfile = loc_panel + panel + '.csv'
            fileObj1 = open(panelfile, 'r')

            genes = []
            for line in fileObj1.readlines():
                gene = line.split('\n')[0]
                genes.append(gene)
            fileObj1.close()
            panel_genes = set(genes)

            # make list of hpo genes
            #loc_hpo = 'HPO_unique_genes.txt'
            loc_hpo = './hpo/HPO_unique_genes.txt'
            fileObj_hpo = open(loc_hpo, "r")
            hpo_gene = []
            for line in fileObj_hpo.readlines():
                gene = line.split('\n')[0]
                hpo_gene.append(gene)
            fileObj_hpo.close()
            hpo_genes = set(hpo_gene)

            # make dataframe of difference
            no_hpo_list = []
            for panelgene in panel_genes:
                in_hpo = 'False'
                gene = (panelgene.split('|'))
                for x in gene:
                    if x in hpo_genes:
                        in_hpo = 'True'

                if in_hpo == 'False':
                    first_gene = panelgene.split('|')[0]
                    no_hpo_list.append(first_gene)

            df_diff = pd.DataFrame(columns=['Panel','Genes_no_HPO'])
            for g in no_hpo_list:
                df_diff =  df_diff.append({'Panel':panel, 'Genes_no_HPO':g}, ignore_index=True)
            display(HTML_with_style(df_diff.iloc[:].sort_values(by = "Genes_no_HPO").reset_index(drop=True)))

interact_manual(hpo_panel)


## Low Expressed Genes in ROH
#### Tabel of genes which are lowly expressed in the regions of the uploaded file.
##### Default cpm-threshold of 1 means at least 20 counts of the fragment in a total library size of 20 million reads.

In [None]:
# Genes which are low expressed in ROH (upload file)

def Low_ROH (cpm_threshold):
    
    cpm_threshold = float(cpm_threshold)
    main_path_file = './countfiles_all_exp/'    

    if ('df_position', 'fragments', 'experiment' in globals() ):
        if len(position) != 0:
            file = experiment + '_ht2_' + fragments + '_counts.tsv'
            
            if cpm_threshold != 0:

                if file != '':
                    path = main_path_file + file
                    countdata = pd.read_csv(path, sep='\t').set_index('Unnamed: 0')
                    countdata = countdata.loc[:,(countdata.columns.isin(metadata_species['sample_id']))]

                    # create table with count per million (cpm) (correction for library size)
                    df_cpm = countdata
                    columns = list(df_cpm)
                    for i in columns:
                        df_cpm[i] = (df_cpm[i] / df_cpm[i].sum())*1000000

                    # lager dan CPM = 1 is beter om niet te gebruiken (minstens 20 counts bij een lib size van 20 miljoen)
                    # genen met een median lager dan cpm = threshold en met totalcounts > 0
                    df_low= df_cpm.assign(median_counts = df_cpm.median(axis=1))    
                    df_low = df_low.loc[df_low['median_counts'] < cpm_threshold]
                    df_low = df_low.drop(['median_counts'], axis=1)
                    df_low= df_low.assign(sum_counts = df_low.sum(axis=1))
                    df_low = df_low.loc[df_low['sum_counts'] > 0]
                    df_low = df_low.drop(['sum_counts'], axis=1)
                    df_low.reset_index(level=0, inplace=True)

                    # welke genen komen laag tot expressie in gekozen weefsel?
                    df_low = df_low.loc[:,['Unnamed: 0']]
                    df_low[['chr','start','end','gene_fragment']] = df_low['Unnamed: 0'].str.split('_',3,expand=True)
                    df_low = pd.DataFrame(df_low)
                    df_low[['start','end']] = df_low[['start','end']].apply(pd.to_numeric)

                    # filter df
                    # make empty dataframe
                    # search per line of df_position, make dataframe and append to empty df
                    df_empty = pd.DataFrame(columns= list(df_low.columns))
                    for i, j in df_position.iterrows():
                        chrom = str(j[0])
                        if chrom.startswith('chr'):
                            chrom = chrom
                        else:
                            chrom = 'chr' + chrom
                        start = j[1]
                        end = j[2]
                        df_roh_filter = df_low.loc[ (df_low['chr'] == chrom) & (df_low['start'] >= start) & (df_low['end'] <= end) ]
                        df_empty = df_empty.append(df_roh_filter, ignore_index=True)
                        df_roh = df_empty

                display(df_roh[['chr','start','end','gene_fragment']])
                
            else:
                display(widgets.HTML(tags.h5("cpm_threshold could not be 0").render()))
            
        else:
            display(widgets.HTML(tags.h5("No Upload file present").render()))
            

interact_manual(Low_ROH,
               cpm_threshold = Text(
                    value='1', 
                    style = style, disabled=False)
               )

## Low Expressed Genes in ROH
#### Tabel of genes which are lowly expressed in the regions of the uploaded file.
##### Default tpm-threshold of 0.1875 means 15 reads of the fragment in a total library size of 80 million reads.

In [None]:
# based on TPM (transcripts per million)
# Genes which are low expressed in ROH (upload file)

def Low_ROH (tpm_threshold):
    
    tpm_threshold = float(tpm_threshold)
    main_path_file = './countfiles_all_exp/'    

    if ('df_position', 'fragments', 'experiment' in globals() ):
        if len(position) != 0:
            file = experiment + '_ht2_' + fragments + '_TPM.tsv'

            if tpm_threshold != 0:

                if file != '':
                    path = main_path_file + file
                    countdata = pd.read_csv(path, sep='\t').set_index('Unnamed: 0')
                    countdata = countdata.loc[:,(countdata.columns.isin(metadata_species['sample_id']))]
                    df_tpm = countdata
                    #display(df_tpm)

                    # default threshold tpm = 0.1875, appr. 15 reads per 40 million x2(paired-end) reads
                    # genen met een median lager dan tpm = threshold en met totalcounts > 0
                    df_low= df_tpm.assign(median_counts = df_tpm.median(axis=1))
                    df_low = df_low.loc[df_low['median_counts'] < tpm_threshold]
                    df_low = df_low.drop(['median_counts'], axis=1)
                    df_low= df_low.assign(sum_counts = df_low.sum(axis=1))
                    df_low = df_low.loc[df_low['sum_counts'] > 0]
                    df_low = df_low.drop(['sum_counts'], axis=1)
                    df_low.reset_index(level=0, inplace=True)

                    # welke genen komen laag tot expressie in gekozen weefsel?
                    df_low = df_low.loc[:,['Unnamed: 0']]
                    df_low[['chr','start','end','gene_fragment']] = df_low['Unnamed: 0'].str.split('_',3,expand=True)
                    df_low = pd.DataFrame(df_low)
                    df_low[['start','end']] = df_low[['start','end']].apply(pd.to_numeric)
                    #display(df_low)
                    
                    # filter df
                    # make empty dataframe
                    # search per line of df_position, make dataframe and append to empty df
                    df_empty = pd.DataFrame(columns= list(df_low.columns))
                    for i, j in df_position.iterrows():
                        chrom = str(j[0])
                        if chrom.startswith('chr'):
                            chrom = chrom
                        else:
                            chrom = 'chr' + chrom
                        start = j[1]
                        end = j[2]
                        df_roh_filter = df_low.loc[ (df_low['chr'] == chrom) & (df_low['start'] >= start) & (df_low['end'] <= end) ]
                        df_empty = df_empty.append(df_roh_filter, ignore_index=True)
                        df_roh = df_empty

                display(df_roh[['chr','start','end','gene_fragment']])

            else:
                display(widgets.HTML(tags.h5("tpm_threshold could not be 0").render()))

        else:
            display(widgets.HTML(tags.h5("No Upload file present").render()))
            
interact_manual(Low_ROH,
               tpm_threshold = Text(
                    value='0.1875', 
                    style = style, disabled=False)
               )

## Not Expressed Genes in ROH
#### Tabel of genes which are not expressed in the regions of the uploaded file. There are no counts present in any of the samples.

In [None]:
# Genes which are not expressed in ROH (upload file)

def No_ROH ():
    
    main_path_file = './countfiles_all_exp/'    

    if ('df_position','fragments', 'experiment' in globals() ):
        if len(position) != 0:
            file = experiment + '_ht2_' + fragments + '_counts.tsv'

            if file != '':
                path = main_path_file + file
                countdata = pd.read_csv(path, sep='\t').set_index('Unnamed: 0')
                countdata = countdata.loc[:,(countdata.columns.isin(metadata_species['sample_id']))]

                # create table with count per million (cpm) (correction for library size)
                df_cpm = countdata
                columns = list(df_cpm)
                for i in columns:
                    df_cpm[i] = (df_cpm[i] / df_cpm[i].sum())*1000000

                # totalcounts > 0
                df_no= df_cpm.assign(sum_counts = df_cpm.sum(axis=1))
                df_no = df_no.loc[df_no['sum_counts'] == 0]
                df_no = df_no.drop(['sum_counts'], axis=1)
                df_no.reset_index(level=0, inplace=True)

                # welke genen komen niet tot expressie in gekozen weefsel?
                df_no = df_no.loc[:,['Unnamed: 0']]
                df_no[['chr','start','end','gene_fragment']] = df_no['Unnamed: 0'].str.split('_',3,expand=True)
                df_no = pd.DataFrame(df_no)
                df_no[['start','end']] = df_no[['start','end']].apply(pd.to_numeric)
    
                # filter df
                # make empty dataframe
                # search per line of df_position, make dataframe and append to empty df
                df_empty = pd.DataFrame(columns= list(df_no.columns))
                for i, j in df_position.iterrows():
                    chrom = str(j[0])
                    if chrom.startswith('chr'):
                        chrom = chrom
                    else:
                        chrom = 'chr' + chrom
                    start = j[1]
                    end = j[2]
                    df_roh_filter = df_no.loc[ (df_no['chr'] == chrom) & (df_no['start'] >= start) & (df_no['end'] <= end) ]
                    df_empty = df_empty.append(df_roh_filter, ignore_index=True)
                    df_roh = df_empty[['chr','start','end','gene_fragment']]

            display(HTML_with_style(df_roh.iloc[:]))
            
        else:
            display(widgets.HTML(tags.h5("No Upload file present").render()))
            

interact_manual(No_ROH)