In [None]:
import scvelo as scv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scAnalysis as scrna
import scanpy as sc
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

scv.__version__
scv.settings.presenter_view = True  # set max width size for presenter view
scv.settings.set_figure_params('scvelo')  # for beautified visualization

In [None]:
def entropy(andata,metadata):
    
    '''
    input: 
    'andata': file with expression and metadata information of the experiment
    'metadata': the particular metadata we want to calculate thhe entropy difference for, eg: louvain
    
    output:
    'st': overall entropy difference between having all the classes defined by 'metadata' uniformly filling the PC1-PC2-PC3 volume versus
    havind them distributed as seen in the experiment
    'diction': dictionary with the entropy difference for each individual class in 'metadata'
    
    This function uses PyntCloud to estimate the volume occupied by cells in principal component space, using te first 3 PCs:    
    https://pyntcloud.readthedocs.io/en/latest/
    '''
    
    
    import numpy as np
    from pyntcloud import PyntCloud
    
    emb='X_pca'

    #total volume
    df=pd.DataFrame(data=andata.obsm[emb][:,0:3])
    df.columns=['x','y','z']
    pct=PyntCloud(points=df)
    convex_hull_id = pct.add_structure("convex_hull")
    convex_hull = pct.structures[convex_hull_id]
    vt=convex_hull.volume

    #total number of i cells
    nt=len(andata)
    
    #dictionary to store entropy difference per category
    diction={i:[] for i in andata.obs[metadata].unique()}

    st=0
    for i in andata.obs[metadata].unique():
        andataaux=andata[andata.obs[metadata]==i]
        
        #number of i cells
        ni=len(andataaux)
        
        if ni>3:

            #volume of i cells
            df=pd.DataFrame(data=andataaux.obsm[emb][:,0:3])
            df.columns=['x','y','z']
            pc=PyntCloud(points=df)
            convex_hull_id = pc.add_structure("convex_hull")
            convex_hull = pc.structures[convex_hull_id]
            vi=convex_hull.volume

            #entropy difference between species filling up the wole volume vt and occupying their actual space
            si=ni*np.log(vi/vt)#ni*np.log(vi/vt)
            st=st+si
            
            diction[i]=si
        else:
            diction[i]=np.nan
    
    return st,diction

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
entropy_per_expbc_df=pd.DataFrame()

In [None]:
random_entropy_df=pd.DataFrame()

In [None]:
#RANDOM
timepoint='0hr'


files=['/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC4006.h5ad',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC827velo_nolayer.h5ad',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/MGH707.h5ad',
      '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/PC9.h5ad']


lines_dict={'/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC4006.h5ad':'HCC4006',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC827velo_nolayer.h5ad':'HCC827',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/MGH707.h5ad':'MGH707',
      '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/PC9.h5ad':'PC9'}

entropy_df=pd.DataFrame(index=[val for key,val in lines_dict.items()],columns=['entropy_diff','entropy_diff_per_category'])

k=0
m='cloneid'
for f in files:
    print(f)
    adataall = sc.read(f)
    #look at 24hr
    adata=adataall[adataall.obs['timepoint']==timepoint]
    
    #remove ribosomal and mitochondiral genes
    no_rb_genes=[n for n in adata.var_names if n[0:2]!='RP' and n[0:2]!='MT']
    adata=adata[:,no_rb_genes]
    
    #keep only cells with assigned class
    adata=adata[adata.obs[m]!='-']
    adata=adata[adata.obs[m]!='no-bc']
    
    adata=scrna.dim_reduction(adata,n_neighbors=10,n_pcs=40,umap=True,tsne=False,plot=False,savepdf=False)
    
    for kk in range(50):#repeat randomization
        #add as many random classes ad number of cloneid
        N=len(adata.obs['cloneid'].unique())
        adata.obs['random']=np.random.randint(N,size=len(adata.obs)).tolist()

        st,diction=entropy(adata,'random')

        entropy_df.loc[lines_dict[f],'entropy_diff']=st
        entropy_df.loc[lines_dict[f],'entropy_diff_per_category']=st/len(adata.obs['random'].unique())

        dfaux=pd.DataFrame(data=diction, index=[0]).T
        dfaux['cell_line']=lines_dict[f]
        dfaux['timepoint']=timepoint
        dfaux=dfaux.rename(columns={0:'entropy_difference'})
        random_entropy_df=random_entropy_df.append(dfaux)




#RANDOM
timepoint='24hr'


files=['/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC4006.h5ad',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC827velo_nolayer.h5ad',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/MGH707.h5ad',
      '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/PC9.h5ad']


lines_dict={'/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC4006.h5ad':'HCC4006',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC827velo_nolayer.h5ad':'HCC827',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/MGH707.h5ad':'MGH707',
      '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/PC9.h5ad':'PC9'}

entropy_df=pd.DataFrame(index=[val for key,val in lines_dict.items()],columns=['entropy_diff','entropy_diff_per_category'])

k=0
m='cloneid'
for f in files:
    print(f)
    adataall = sc.read(f)
    #look at 24hr
    adata=adataall[adataall.obs['timepoint']==timepoint]
    
    #remove ribosomal and mitochondiral genes
    no_rb_genes=[n for n in adata.var_names if n[0:2]!='RP' and n[0:2]!='MT']
    adata=adata[:,no_rb_genes]
    
    #keep only cells with assigned class
    adata=adata[adata.obs[m]!='-']
    adata=adata[adata.obs[m]!='no-bc']
    
    adata=scrna.dim_reduction(adata,n_neighbors=10,n_pcs=40,umap=True,tsne=False,plot=False,savepdf=False)
    
    for kk in range(50):#repeat randomization
        #add as many random classes ad number of cloneid
        N=len(adata.obs['cloneid'].unique())
        adata.obs['random']=np.random.randint(N,size=len(adata.obs)).tolist()

        st,diction=entropy(adata,'random')

        entropy_df.loc[lines_dict[f],'entropy_diff']=st
        entropy_df.loc[lines_dict[f],'entropy_diff_per_category']=st/len(adata.obs['random'].unique())

        dfaux=pd.DataFrame(data=diction, index=[0]).T
        dfaux['cell_line']=lines_dict[f]
        dfaux['timepoint']=timepoint
        dfaux=dfaux.rename(columns={0:'entropy_difference'})
        random_entropy_df=random_entropy_df.append(dfaux)



#RANDOM
timepoint='14day'


files=['/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC4006.h5ad',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC827velo_nolayer.h5ad',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/MGH707.h5ad',
      '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/PC9.h5ad']


lines_dict={'/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC4006.h5ad':'HCC4006',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC827velo_nolayer.h5ad':'HCC827',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/MGH707.h5ad':'MGH707',
      '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/PC9.h5ad':'PC9'}

entropy_df=pd.DataFrame(index=[val for key,val in lines_dict.items()],columns=['entropy_diff','entropy_diff_per_category'])

k=0
m='cloneid'
for f in files:
    print(f)
    adataall = sc.read(f)
    #look at 24hr
    adata=adataall[adataall.obs['timepoint']==timepoint]
    
    #remove ribosomal and mitochondiral genes
    no_rb_genes=[n for n in adata.var_names if n[0:2]!='RP' and n[0:2]!='MT']
    adata=adata[:,no_rb_genes]
    
    #keep only cells with assigned class
    adata=adata[adata.obs[m]!='-']
    adata=adata[adata.obs[m]!='no-bc']
    
    adata=scrna.dim_reduction(adata,n_neighbors=10,n_pcs=40,umap=True,tsne=False,plot=False,savepdf=False)
    
    for kk in range(50):#repeat randomization
        #add as many random classes ad number of cloneid
        N=len(adata.obs['cloneid'].unique())
        adata.obs['random']=np.random.randint(N,size=len(adata.obs)).tolist()

        st,diction=entropy(adata,'random')

        entropy_df.loc[lines_dict[f],'entropy_diff']=st
        entropy_df.loc[lines_dict[f],'entropy_diff_per_category']=st/len(adata.obs['random'].unique())

        dfaux=pd.DataFrame(data=diction, index=[0]).T
        dfaux['cell_line']=lines_dict[f]
        dfaux['timepoint']=timepoint
        dfaux=dfaux.rename(columns={0:'entropy_difference'})
        random_entropy_df=random_entropy_df.append(dfaux)




#RANDOM
timepoint='16day'


files=['/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC4006.h5ad',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC827velo_nolayer.h5ad',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/MGH707.h5ad',
      '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/PC9.h5ad']


lines_dict={'/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC4006.h5ad':'HCC4006',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC827velo_nolayer.h5ad':'HCC827',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/MGH707.h5ad':'MGH707',
      '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/PC9.h5ad':'PC9'}

entropy_df=pd.DataFrame(index=[val for key,val in lines_dict.items()],columns=['entropy_diff','entropy_diff_per_category'])

k=0
m='cloneid'
for f in files:
    print(f)
    adataall = sc.read(f)
    #look at 24hr
    adata=adataall[adataall.obs['timepoint']==timepoint]
    
    #remove ribosomal and mitochondiral genes
    no_rb_genes=[n for n in adata.var_names if n[0:2]!='RP' and n[0:2]!='MT']
    adata=adata[:,no_rb_genes]
    
    #keep only cells with assigned class
    adata=adata[adata.obs[m]!='-']
    adata=adata[adata.obs[m]!='no-bc']
    
    adata=scrna.dim_reduction(adata,n_neighbors=10,n_pcs=40,umap=True,tsne=False,plot=False,savepdf=False)
    
    for kk in range(50):#repeat randomization
        #add as many random classes ad number of cloneid
        N=len(adata.obs['cloneid'].unique())
        adata.obs['random']=np.random.randint(N,size=len(adata.obs)).tolist()

        st,diction=entropy(adata,'random')

        entropy_df.loc[lines_dict[f],'entropy_diff']=st
        entropy_df.loc[lines_dict[f],'entropy_diff_per_category']=st/len(adata.obs['random'].unique())

        dfaux=pd.DataFrame(data=diction, index=[0]).T
        dfaux['cell_line']=lines_dict[f]
        dfaux['timepoint']=timepoint
        dfaux=dfaux.rename(columns={0:'entropy_difference'})
        random_entropy_df=random_entropy_df.append(dfaux)
    


In [None]:
#LOAD DATA
timepoint='0hr'

files=['/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC4006.h5ad',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC827velo_nolayer.h5ad',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/MGH707.h5ad',
      '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/PC9.h5ad']


lines_dict={'/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC4006.h5ad':'HCC4006',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC827velo_nolayer.h5ad':'HCC827',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/MGH707.h5ad':'MGH707',
      '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/PC9.h5ad':'PC9'}

entropy_df=pd.DataFrame(index=[val for key,val in lines_dict.items()],columns=['entropy_diff','entropy_diff_per_category'])

fig, axs = plt.subplots(2,2, figsize=(10, 7))
axs=axs.ravel()
k=0
m='cloneid'
for f in files:
    print(f)
    adataall = sc.read(f)
    #look at 24hr
    adata=adataall[adataall.obs['timepoint']==timepoint]
    
    #remove ribosomal and mitochondiral genes
    no_rb_genes=[n for n in adata.var_names if n[0:2]!='RP' and n[0:2]!='MT']
    adata=adata[:,no_rb_genes]
    
    #keep only cells with assigned class
    adata=adata[adata.obs[m]!='-']
    adata=adata[adata.obs[m]!='no-bc']
    
    adata=scrna.dim_reduction(adata,n_neighbors=10,n_pcs=40,umap=True,tsne=False,plot=False,savepdf=False)

    st,diction=entropy(adata,m)
    print('Entropy difference',st)
    print('Average entropy difference per category:',st/len(adata.obs[m].unique()))
    
    entropy_df.loc[lines_dict[f],'entropy_diff']=st
    entropy_df.loc[lines_dict[f],'entropy_diff_per_category']=st/len(adata.obs[m].unique())
    
    dfaux=pd.DataFrame(data=diction, index=[0]).T
    dfaux['cell_line']=lines_dict[f]
    dfaux['timepoint']=timepoint
    dfaux=dfaux.rename(columns={0:'entropy_difference'})
    entropy_per_expbc_df=entropy_per_expbc_df.append(dfaux)
    
    adata.obs[m+': entropy_diff']=0
    for key,val in diction.items():
        adata.obs[m+': entropy_diff'].loc[adata.obs[m]==key]=val#np.abs(val)
    #scrna.plot_expr(adata, feat='UMAP', gene=[m+'_entropy'],figsize=(6, 5), ax='',savepdf=False)
    #sc.pl.umap(adata, color=m+': entropy_diff', s=50,cmap='afmhot',ax=axs[k],show=False)#,vmin=-500,vmax=0)#,save=lines_dict[f]+m+'_entropydiff_'+timepoint+'.pdf')
    scv.pl.scatter(adata, color=[m+': entropy_diff'], size=100,ax=axs[k],add_outline=None,show=False,color_map='winter')#,vmin=-400,vmax=0)
    axs[k].set_title(lines_dict[f])
    #axs[k].set_facecolor('lightgray')
    k=k+1
plt.show()
fig.savefig('figures/figure2/'+m+'_entropydiff_'+timepoint+'.pdf', bbox_inches='tight')


In [None]:
#LOAD DATA
timepoint='24hr'

files=['/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC4006.h5ad',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC827velo_nolayer.h5ad',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/MGH707.h5ad',
      '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/PC9.h5ad']


lines_dict={'/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC4006.h5ad':'HCC4006',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC827velo_nolayer.h5ad':'HCC827',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/MGH707.h5ad':'MGH707',
      '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/PC9.h5ad':'PC9'}

entropy_df=pd.DataFrame(index=[val for key,val in lines_dict.items()],columns=['entropy_diff','entropy_diff_per_category'])

fig, axs = plt.subplots(2,2, figsize=(10, 7))
axs=axs.ravel()
k=0
m='cloneid'
for f in files:
    print(f)
    adataall = sc.read(f)
    #look at 24hr
    adata=adataall[adataall.obs['timepoint']==timepoint]
    
    #remove ribosomal and mitochondiral genes
    no_rb_genes=[n for n in adata.var_names if n[0:2]!='RP' and n[0:2]!='MT']
    adata=adata[:,no_rb_genes]
    
    #keep only cells with assigned class
    adata=adata[adata.obs[m]!='-']
    adata=adata[adata.obs[m]!='no-bc']
    
    adata=scrna.dim_reduction(adata,n_neighbors=10,n_pcs=40,umap=True,tsne=False,plot=False,savepdf=False)

    st,diction=entropy(adata,m)
    print('Entropy difference',st)
    print('Average entropy difference per category:',st/len(adata.obs[m].unique()))
    
    entropy_df.loc[lines_dict[f],'entropy_diff']=st
    entropy_df.loc[lines_dict[f],'entropy_diff_per_category']=st/len(adata.obs[m].unique())
    
    dfaux=pd.DataFrame(data=diction, index=[0]).T
    dfaux['cell_line']=lines_dict[f]
    dfaux['timepoint']=timepoint
    dfaux=dfaux.rename(columns={0:'entropy_difference'})
    entropy_per_expbc_df=entropy_per_expbc_df.append(dfaux)
    
    adata.obs[m+': entropy_diff']=0
    for key,val in diction.items():
        adata.obs[m+': entropy_diff'].loc[adata.obs[m]==key]=val#np.abs(val)
    #scrna.plot_expr(adata, feat='UMAP', gene=[m+'_entropy'],figsize=(6, 5), ax='',savepdf=False)
    scv.pl.scatter(adata, color=[m+': entropy_diff'], size=100,ax=axs[k],add_outline=None,show=False,color_map='winter',vmin=-400,vmax=0)
    axs[k].set_title(lines_dict[f])
    #axs[k].set_facecolor('lightgray')
    k=k+1
plt.show()
fig.savefig('figures/'+m+'_entropydiff_'+timepoint+'.pdf')


In [None]:
#LOAD DATA
timepoint='14day'

files=['/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC4006.h5ad',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC827velo_nolayer.h5ad',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/MGH707.h5ad',
      '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/PC9.h5ad']


lines_dict={'/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC4006.h5ad':'HCC4006',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC827velo_nolayer.h5ad':'HCC827',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/MGH707.h5ad':'MGH707',
      '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/PC9.h5ad':'PC9'}

entropy_df=pd.DataFrame(index=[val for key,val in lines_dict.items()],columns=['entropy_diff','entropy_diff_per_category'])

fig, axs = plt.subplots(2,2, figsize=(10, 7))
axs=axs.ravel()
k=0
m='cloneid'
for f in files:
    print(f)
    adataall = sc.read(f)
    #look at 24hr
    adata=adataall[adataall.obs['timepoint']==timepoint]
    
    #remove ribosomal and mitochondiral genes
    no_rb_genes=[n for n in adata.var_names if n[0:2]!='RP' and n[0:2]!='MT']
    adata=adata[:,no_rb_genes]
    
    #keep only cells with assigned class
    adata=adata[adata.obs[m]!='-']
    adata=adata[adata.obs[m]!='no-bc']
    
    adata=scrna.dim_reduction(adata,n_neighbors=10,n_pcs=40,umap=True,tsne=False,plot=False,savepdf=False)

    st,diction=entropy(adata,m)
    print('Entropy difference',st)
    print('Average entropy difference per category:',st/len(adata.obs[m].unique()))
    
    entropy_df.loc[lines_dict[f],'entropy_diff']=st
    entropy_df.loc[lines_dict[f],'entropy_diff_per_category']=st/len(adata.obs[m].unique())
    
    dfaux=pd.DataFrame(data=diction, index=[0]).T
    dfaux['cell_line']=lines_dict[f]
    dfaux['timepoint']=timepoint
    dfaux=dfaux.rename(columns={0:'entropy_difference'})
    entropy_per_expbc_df=entropy_per_expbc_df.append(dfaux)
    
    adata.obs[m+': entropy_diff']=0
    for key,val in diction.items():
        adata.obs[m+': entropy_diff'].loc[adata.obs[m]==key]=val#np.abs(val)
    #scrna.plot_expr(adata, feat='UMAP', gene=[m+'_entropy'],figsize=(6, 5), ax='',savepdf=False)
    scv.pl.scatter(adata, color=[m+': entropy_diff'], size=100,ax=axs[k],add_outline=None,show=False,color_map='winter',vmin=-400,vmax=0)
    axs[k].set_title(lines_dict[f])
    #axs[k].set_facecolor('lightgray')
    k=k+1
plt.show()
fig.savefig('figures/'+m+'_entropydiff_'+timepoint+'.pdf')


In [None]:
#LOAD DATA
timepoint='16day'

files=['/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC4006.h5ad',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC827velo_nolayer.h5ad',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/MGH707.h5ad',
      '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/PC9.h5ad']


lines_dict={'/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC4006.h5ad':'HCC4006',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/HCC827velo_nolayer.h5ad':'HCC827',
       '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/MGH707.h5ad':'MGH707',
      '/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/PC9.h5ad':'PC9'}

entropy_df=pd.DataFrame(index=[val for key,val in lines_dict.items()],columns=['entropy_diff','entropy_diff_per_category'])

fig, axs = plt.subplots(2,2, figsize=(10, 7))
axs=axs.ravel()
k=0
m='cloneid'
for f in files:
    print(f)
    adataall = sc.read(f)
    #look at 24hr
    adata=adataall[adataall.obs['timepoint']==timepoint]
    
    #remove ribosomal and mitochondiral genes
    no_rb_genes=[n for n in adata.var_names if n[0:2]!='RP' and n[0:2]!='MT']
    adata=adata[:,no_rb_genes]
    
    #keep only cells with assigned class
    adata=adata[adata.obs[m]!='-']
    adata=adata[adata.obs[m]!='no-bc']
    
    adata=scrna.dim_reduction(adata,n_neighbors=10,n_pcs=40,umap=True,tsne=False,plot=False,savepdf=False)

    st,diction=entropy(adata,m)
    print('Entropy difference',st)
    print('Average entropy difference per category:',st/len(adata.obs[m].unique()))
    
    entropy_df.loc[lines_dict[f],'entropy_diff']=st
    entropy_df.loc[lines_dict[f],'entropy_diff_per_category']=st/len(adata.obs[m].unique())
    
    dfaux=pd.DataFrame(data=diction, index=[0]).T
    dfaux['cell_line']=lines_dict[f]
    dfaux['timepoint']=timepoint
    dfaux=dfaux.rename(columns={0:'entropy_difference'})
    entropy_per_expbc_df=entropy_per_expbc_df.append(dfaux)
    
    adata.obs[m+': entropy_diff']=0
    for key,val in diction.items():
        adata.obs[m+': entropy_diff'].loc[adata.obs[m]==key]=val#np.abs(val)
    #scrna.plot_expr(adata, feat='UMAP', gene=[m+'_entropy'],figsize=(6, 5), ax='',savepdf=False)
    scv.pl.scatter(adata, color=[m+': entropy_diff'], size=100,ax=axs[k],add_outline=None,show=False,color_map='winter',vmin=-400,vmax=0)
    axs[k].set_title(lines_dict[f])
    #axs[k].set_facecolor('lightgray')
    k=k+1
plt.show()
fig.savefig('figures/'+m+'_entropydiff_'+timepoint+'.pdf')


In [None]:
random_entropy_df2=random_entropy_df.dropna()
random_entropy_df2['Barcode']=random_entropy_df2.index
df2=random_entropy_df2.copy()
dfnorm=df2.groupby(['cell_line','timepoint']).mean().reset_index()


entropy_per_expbc_df=entropy_per_expbc_df.dropna()
entropy_per_expbc_df['Barcode']=entropy_per_expbc_df.index
mask=np.isin(entropy_per_expbc_df.cell_line,['HCC4006','HCC827'])
df1=entropy_per_expbc_df.loc[mask]
a=[]
for i in range(len(df1)):
    mask1=dfnorm['cell_line']==df1.iloc[i]['cell_line']
    mask2=dfnorm['timepoint']==df1.iloc[i]['timepoint']
    mask=mask1&mask2
    norm=dfnorm.loc[mask]['entropy_difference']
    
    val=df1.iloc[i]['entropy_difference']-norm
    a.append(val.values[0])
df1['entropy change']=a
fig, axs = plt.subplots(1,1, figsize=(6, 4))
sns.barplot(data=df1,x='cell_line',y='entropy change',hue='timepoint',palette="Set2",ax=axs)
axs.set_ylim([-85,0])
fig.savefig('figures/figure2/entropy_diff_barplot.pdf')



entropy_per_expbc_df=entropy_per_expbc_df.dropna()
entropy_per_expbc_df['Barcode']=entropy_per_expbc_df.index
mask=np.isin(entropy_per_expbc_df.cell_line,['PC9','MGH707'])
df1=entropy_per_expbc_df.loc[mask]
a=[]
for i in range(len(df1)):
    mask1=dfnorm['cell_line']==df1.iloc[i]['cell_line']
    mask2=dfnorm['timepoint']==df1.iloc[i]['timepoint']
    mask=mask1&mask2
    norm=dfnorm.loc[mask]['entropy_difference']
    
    val=df1.iloc[i]['entropy_difference']-norm
    a.append(val.values[0])
df1['entropy change']=a
fig, axs = plt.subplots(1,1, figsize=(6, 4))
sns.barplot(data=df1,x='cell_line',y='entropy change',hue='timepoint',palette="Set2",ax=axs)
axs.set_ylim([-85,0])
fig.savefig('figures/figure2/entropy_diff_barplotSI.pdf')

In [None]:
entropy_per_expbc_df=entropy_per_expbc_df.dropna()
entropy_per_expbc_df['Barcode']=entropy_per_expbc_df.index

mask=np.isin(entropy_per_expbc_df.cell_line,['HCC4006','HCC827'])
df1=entropy_per_expbc_df.loc[mask]

fig, axs = plt.subplots(1,1, figsize=(6, 4))
sns.barplot(data=df1,x='cell_line',y='entropy_difference',hue='timepoint',palette="Set2",ax=axs)

random_entropy_df2=random_entropy_df.dropna()
random_entropy_df2['Barcode']=random_entropy_df2.index
mask=np.isin(random_entropy_df2.cell_line,['HCC4006','HCC827'])
df2=random_entropy_df2.loc[mask]
sns.barplot(data=df2,x='cell_line',y='entropy_difference',hue='timepoint',facecolor=(1, 1, 1, 0),edgecolor='gray',errcolor='gray',ax=axs)

fig.savefig('figures/figure2/entropy_diff_barplot.pdf')

In [None]:
entropy_per_expbc_df=entropy_per_expbc_df.dropna()
entropy_per_expbc_df['Barcode']=entropy_per_expbc_df.index

mask=np.isin(entropy_per_expbc_df.cell_line,['PC9','MGH707'])
df=entropy_per_expbc_df.loc[mask]

fig, axs = plt.subplots(1,1, figsize=(6, 4))
sns.barplot(data=df,x='cell_line',y='entropy_difference',hue='timepoint',palette="Set2",ax=axs)

random_entropy_df2=random_entropy_df.dropna()
random_entropy_df2['Barcode']=random_entropy_df2.index
mask=np.isin(random_entropy_df2.cell_line,['PC9','MGH707'])
df=random_entropy_df2.loc[mask]
sns.barplot(data=df,x='cell_line',y='entropy_difference',hue='timepoint',facecolor=(1, 1, 1, 0),edgecolor='gray',errcolor='gray',ax=axs)

fig.savefig('figures/figure2/entropy_diff_barplotSI.pdf')

In [None]:
entropy_per_expbc_df