In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import met_brewer
import pandas as pd
import numpy as np
import seaborn as sns
import sys
import upsetplot

import statsmodels.api as sm
import statsmodels.formula.api as smf

from Bio.Seq import Seq
from scipy.stats import fisher_exact
from scipy.stats import mannwhitneyu
from scipy.stats import pearsonr

import plotting
from plotting import PAPER_PRESET, PAPER_FONTSIZE, nice_boxplot, nice_violinplot, mimic_r_boxplot


%matplotlib inline
%config InlineBackend.figure_format = 'svg'
mpl.rcParams['figure.autolayout'] = False

In [3]:
from data_loading import (load_annotated_6k_collection,
                          load_valid_isoform_clones,
                          load_developmental_tissue_expression_remapped)

In [4]:
sns.set(**PAPER_PRESET)
fontsize = PAPER_FONTSIZE

In [5]:
np.random.seed(2023)

## functions

In [6]:
def calculate_tau(df):
    array = df.values
    
    ## will return NaN as tau for every row that has any NaNs
    array_max = np.max(array, axis=1)
    tmp = array.T / array_max
    tmp = 1 - tmp.T
    nonan_taus = np.sum(tmp, axis=1) / (array.shape[1])
    
    ## will ignore NaNs and compute on the rest of the values
    array_max = np.nanmax(array, axis=1)
    tmp = array.T / array_max
    tmp = 1 - tmp.T
    nan_taus = np.nansum(tmp, axis=1) / np.count_nonzero(~np.isnan(array), axis=1)
    
    
    return nonan_taus, nan_taus, array_max

## variables

In [7]:
dn_cats_f = "../data/processed/DN_cats_Joung.tsv"

In [8]:
pal = {"ref": sns.color_palette("Set2")[0],
       "ref-v-ref": sns.color_palette("Set2")[0],
       "rewire": sns.color_palette("Set2")[2],
       "DN": sns.color_palette("Set2")[1],
       "NA": "lightgray",
       "likely": "darkgray"}

## 1. import data

In [9]:
dn_cats = pd.read_table(dn_cats_f)
dn_cats["dn_cat"].fillna("NA", inplace=True)
dn_cats.dn_cat.value_counts()

NA        278
ref       246
DN         93
rewire     74
likely      2
Name: dn_cat, dtype: int64

In [10]:
tfs = load_annotated_6k_collection()

reading from cache


In [11]:
dev = load_developmental_tissue_expression_remapped()

reading from cache


In [16]:
dev[0].head()

Unnamed: 0_level_0,ERR2598356,ERR2598060,ERR2598278,ERR2598294,ERR2598268,ERR2598057,ERR2598194,ERR2598317,ERR2598130,ERR2598362,...,ERR2598246,ERR2598076,ERR2598117,ERR2598350,ERR2598133,ERR2598319,ERR2598289,ERR2598099,ERR2598286,ERR2598191
UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AEBP2|2/3|05F03 nomatch,0.0,1.608057,0.258097,0.764935,0.162696,3.159794,1.389297,0.0,1.686133,1.372545,...,0.196325,1.298377,0.917712,1.448113,2.616694,0.0,0.0,2.389,0.0,3.467645
AEBP2|3/3|05E07 nomatch,0.407233,0.0,0.0,0.0,0.0,0.0,0.511905,0.0,0.411271,0.0,...,0.0,0.209914,0.0,0.294385,0.560105,0.162574,0.0,0.338601,0.0,0.493397
ARNT2|1/6|08C12 ARNT2-201,5.147588,4.908563,2.770852,4.771093,4.595307,6.597226,3.07863,1.455923,4.724328,3.125368,...,3.781559,3.227964,5.227733,6.901724,3.820393,5.7231,0.444248,3.951187,2.071869,4.477444
ARNT2|2/6|09E01 nomatch,0.044957,0.0,0.0,0.0,0.0,2.549023,0.0,0.0,0.0,0.0,...,0.0,0.0,0.273123,0.0,0.086424,0.099842,0.0,0.0,0.0,0.204975
ARNT2|3/6|10D11 ARNT2-209,0.0,0.0,3.173089,0.0,3.528334,0.0,0.0,0.0,0.0,0.0,...,2.399852,0.0,0.0,0.320557,0.0,2.527281,0.0,0.0,1.880133,0.0
