In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import seaborn as sns

import scAnalysis as scrna

import scvelo as scv
scv.__version__
scv.settings.presenter_view = True  # set max width size for presenter view
scv.settings.set_figure_params('scvelo')  # for beautified visualization

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
def dotmap(dfin1,dfin2,cmap='seismic',ax='',figsize=(7,7),cbar_kw={}, cbarlabel='', **kwargs):
        
    if ax == '':
        fig, axw = plt.subplots(1, 1, figsize=figsize)
    else:
        axw=ax
        

    #with sns.axes_style("darkgrid", {"axes.facecolor": ".9"}):

    dfa=pd.DataFrame(data=dfin1.values,index=dfin1.index.tolist(),columns=dfin1.columns.tolist())
    cols=dfa.columns
    idx=dfa.index
    xlabel=dfin1.columns.name
    ylabel=dfin1.index.name
    dfa['yname']=dfa.index.values.tolist()
    dfa=pd.melt(dfa,id_vars=['yname'])


    dfb=pd.DataFrame(data=dfin2.values,index=dfin2.index.tolist(),columns=dfin2.columns.tolist())
    dfb['yname']=dfb.index.values.tolist()
    dfb=pd.melt(dfb,id_vars=['yname'])

    #column mapping
    k=0
    colmap={i:[] for i in cols}
    for i in cols:
        colmap[i]=k
        k=k+1

    #row mapping
    k=0
    idxmap={i:[] for i in idx}
    for i in idx:
        idxmap[i]=k
        k=k+1

    dfa['x']=dfa['variable'].map(colmap)+0.5
    dfa['y']=dfa['yname'].map(idxmap)+0.5
    dfa['size']=dfb['value']

    #normalize size
    dfa['size']=dfa['size']/max(dfa['size'])

    size_scale = 1000

    im=axw.scatter(dfa['x'],dfa['y'],c=dfa['value'],s=size_scale*dfa['size'],cmap=cmap,linewidths=0.5,edgecolors='k', **kwargs)

    # Create colorbar
    cbar = axw.figure.colorbar(im, ax=axw,**cbar_kw,drawedges=False)
    cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom")

    axw.set_xticks(dfa['x'])
    axw.set_xticklabels(dfa['variable'], rotation=90)#, horizontalalignment='right')
    axw.set_yticks(dfa['y'])
    axw.set_yticklabels(dfa['yname'])
    axw.set_xlabel(xlabel)
    axw.set_ylabel(ylabel)
    axw.grid(False, 'major')
    axw.grid(True, 'minor')
    axw.set_xticks([t + 0.5 for t in axw.get_xticks()], minor=True)
    axw.set_yticks([t + 0.5 for t in axw.get_yticks()], minor=True)
    axw.set_xlim([0, max([v for v in dfa['x'].values]) + 0.5]) 
    axw.set_ylim([0, max([v for v in dfa['y'].values]) + 0.5])
    plt.gca().set_aspect('equal', adjustable='box')
    axw.set_facecolor((0.95, 0.95, 0.95))
    for axis in ['top','bottom','left','right']:
        axw.spines[axis].set_linewidth(0)
    axw.xaxis.set_tick_params(width=0)
    axw.yaxis.set_tick_params(width=0)

In [None]:
#LOAD DATA
results_file = 'final_anndatasNEW/HCC827.h5ad'#'PC9.h5ad'#
adata827 = sc.read(results_file)

results_file = 'final_anndatasNEW/HCC4006.h5ad'#'PC9.h5ad'#
adata4006 = sc.read(results_file)

results_file = 'final_anndatasNEW/MGH707.h5ad'#'PC9.h5ad'#
adata707 = sc.read(results_file)

results_file = 'final_anndatasNEW/PC9.h5ad'#'PC9.h5ad'#
adataPC9 = sc.read(results_file)

In [None]:
df=adata827.obs
df=df[['trajectory_class','timepoint','Barcode']].groupby(['trajectory_class','timepoint']).count().reset_index()
df.to_csv('barcode_countsHCC827.csv')

df=adata4006.obs
df=df[['trajectory_class','timepoint','Barcode']].groupby(['trajectory_class','timepoint']).count().reset_index()
df.to_csv('barcode_countsHCC4006.csv')

df=adataPC9.obs
df=df[['trajectory_class','timepoint','Barcode']].groupby(['trajectory_class','timepoint']).count().reset_index()
df.to_csv('barcode_countsPC9.csv')

df=adata707.obs
df=df[['trajectory_class','timepoint','Barcode']].groupby(['trajectory_class','timepoint']).count().reset_index()
df.to_csv('barcode_countsMGH707.csv')

In [None]:
df=adata827.obs
df=df[['trajectory_class','timepoint','cloneid']].groupby(['trajectory_class','timepoint']).nunique().reset_index()
df.to_csv('barcode_UniqueCountsHCC827.csv')

df=adata4006.obs
df=df[['trajectory_class','timepoint','cloneid']].groupby(['trajectory_class','timepoint']).nunique().reset_index()
df.to_csv('barcode_UniquecountsHCC4006.csv')

df=adataPC9.obs
df=df[['trajectory_class','timepoint','cloneid']].groupby(['trajectory_class','timepoint']).nunique().reset_index()
df.to_csv('barcode_UniquecountsPC9.csv')

df=adata707.obs
df=df[['trajectory_class','timepoint','cloneid']].groupby(['trajectory_class','timepoint']).nunique().reset_index()
df.to_csv('barcode_UniquecountsMGH707.csv')

In [None]:
adata4006

In [None]:
df=adata4006.obs
df[['trajectory_class','timepoint','cloneid']].groupby(['trajectory_class','timepoint']).nunique().reset_index()#.nunique()


In [None]:
df.loc[df['trajectory_class']=='1'].loc[df['timepoint']=='0hr'][['trajectory_class','timepoint','Barcode']]

In [None]:
df[['trajectory_class','timepoint','Barcode']].groupby(['trajectory_class','timepoint']).count()

In [None]:
adata=adataPC9.copy()

adata.obs[['trajectory_class','cloneid']].to_csv('figuresNEW/CloneClassMappingPC9.csv')

adata=adata827.copy()
df=adata.obs[['cloneid','trajectory_class']]
df.to_csv('CloneIDvsTrajClass_HCC827.csv')

adata=adata4006.copy()
df=adata.obs[['cloneid','trajectory_class']]
df.to_csv('CloneIDvsTrajClass_HCC4006.csv')

adata=adata707.copy()
df=adata.obs[['cloneid','trajectory_class']]
df.to_csv('CloneIDvsTrajClass_MG707.csv')

adata=adataPC9.copy()
df=adata.obs[['cloneid','trajectory_class']]
df.to_csv('CloneIDvsTrajClass_PC9.csv')

In [None]:
#with colors matching cellxgene
adata=adata827.copy()

stratify='timepoint'
feat='UMAP'
colors = ['purple','orangered','gold','turquoise']#['darkred','violet','green','b']
xs = feat + '1'
ys = feat + '2'

expr = scrna.andata2df(adata)
expr=expr.sort_values(stratify)

fig, axs = plt.subplots(1, 1, figsize=(9, 7))
axw = axs
for c, color in zip(expr[stratify].unique(), colors):  # print cell type text
    mask = expr[stratify] == c
    expraux = expr.loc[mask, [xs, ys, stratify]]
    axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=50, marker='.', label=c,
                linewidths=.3)
    xav = np.mean(expraux[xs])
    yav = np.mean(expraux[ys])
    ctype = expraux[stratify].values[0]
    #axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
    #         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
axw.set_xlabel(xs)
axw.set_ylabel(ys)
#axw.legend(loc=4)
plt.axis('off')
plt.show()
fig.savefig('figuresNEW/HCC827/UMAPS_final/HCC827timepoints.pdf', bbox_inches='tight')

adata=adata4006.copy()

stratify='timepoint'
feat='UMAP'
xs = feat + '1'
ys = feat + '2'

expr = scrna.andata2df(adata)
expr=expr.sort_values(stratify)

fig, axs = plt.subplots(1, 1, figsize=(9, 7))
axw = axs
for c, color in zip(expr[stratify].unique(), colors):  # print cell type text
    mask = expr[stratify] == c
    expraux = expr.loc[mask, [xs, ys, stratify]]
    axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=50, marker='.', label=c,
                linewidths=.3)
    xav = np.mean(expraux[xs])
    yav = np.mean(expraux[ys])
    ctype = expraux[stratify].values[0]
    #axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
    #         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
axw.set_xlabel(xs)
axw.set_ylabel(ys)
#axw.legend(loc=4)
plt.axis('off')
plt.show()
fig.savefig('figuresNEW/HCC4006/UMAPS_final/HCC4006timepoints.pdf', bbox_inches='tight')

adata=adataPC9.copy()

stratify='timepoint'
feat='UMAP'
xs = feat + '1'
ys = feat + '2'

expr = scrna.andata2df(adata)
expr=expr.sort_values(stratify)

fig, axs = plt.subplots(1, 1, figsize=(9, 7))
axw = axs
for c, color in zip(expr[stratify].unique(), colors):  # print cell type text
    mask = expr[stratify] == c
    expraux = expr.loc[mask, [xs, ys, stratify]]
    axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=50, marker='.', label=c,
                linewidths=.3)
    xav = np.mean(expraux[xs])
    yav = np.mean(expraux[ys])
    ctype = expraux[stratify].values[0]
    #axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
    #         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
axw.set_xlabel(xs)
axw.set_ylabel(ys)
#axw.legend(loc=4)
plt.axis('off')
plt.show()
fig.savefig('figuresNEW/PC9/UMAPS_final/PC9timepoints.pdf', bbox_inches='tight')


adata=adata707.copy()

stratify='timepoint'
feat='UMAP'
xs = feat + '1'
ys = feat + '2'

expr = scrna.andata2df(adata)
expr=expr.sort_values(stratify)

fig, axs = plt.subplots(1, 1, figsize=(9, 7))
axw = axs
for c, color in zip(expr[stratify].unique(), colors):  # print cell type text
    mask = expr[stratify] == c
    expraux = expr.loc[mask, [xs, ys, stratify]]
    axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=50, marker='.', label=c,
                linewidths=.3)
    xav = np.mean(expraux[xs])
    yav = np.mean(expraux[ys])
    ctype = expraux[stratify].values[0]
    #axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
    #         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
axw.set_xlabel(xs)
axw.set_ylabel(ys)
#axw.legend(loc=4)
plt.axis('off')
plt.show()
fig.savefig('figuresNEW/MGH707/UMAPS_final/MGH707timepoints.pdf', bbox_inches='tight')


In [None]:
adata707.obs['cloneid'].unique()

#fixing cloneid naming in MGH707
adata707.obs['cloneid'].unique()

a=[]
for c in adata707.obs['cloneid']:
    if 'clone' in c:
        if len(c)==8:
            name=c.split('e')[0]+'e00'+c.split('e')[1]
        elif len(c)==7:
            name=c.split('e')[0]+'e000'+c.split('e')[1]
        elif len(c)==6:
            name=c.split('e')[0]+'e0000'+c.split('e')[1]
    else:
        name=c
    a.append(name)
adata707.obs['cloneid']=a

In [None]:
adata=adata827.copy()

df=pd.read_csv('/da/onc/BFx/research/krishvi7/barcoding/expressed_barcode/20191219_4cellline_DNA/output/HCC827_log2fc.txt',sep='\t')
df=df.loc[df['comparator_group']=='14_vs_0']

a=[]
b=[]
for c in adata.obs['cloneid']:
    val=df.loc[df['clone_id']==c].log2fc.values.tolist()
    if len(val)>0:
        a.append(val[0])
        if val[0]>1:
            b.append('tolerant')
        elif val[0]<-1:
            b.append('sensitive')
        else:
            b.append('-')
    else:
        a.append(np.nan)
        b.append('-')
adata.obs['log2fc']=a
adata.obs['sensitivity']=b
adata.write('final_anndatasNEW/HCC827.h5ad')

fig, ax = plt.subplots(1, 1, figsize=(7, 5))
scv.pl.scatter(adata, color='log2fc', s=80,show=False,ax=ax)
plt.show()
fig.savefig('figuresNEW/HCC827/UMAPS_final/HCC827log2fc.png',dpi=300, bbox_inches='tight')
adata827=adata.copy()




adata=adata4006.copy()

df=pd.read_csv('/da/onc/BFx/research/krishvi7/barcoding/expressed_barcode/20191219_4cellline_DNA/output/HCC4006_log2fc.txt',sep='\t')
df=df.loc[df['comparator_group']=='14_vs_0']

a=[]
b=[]
for c in adata.obs['cloneid']:
    val=df.loc[df['clone_id']==c].log2fc.values.tolist()
    if len(val)>0:
        a.append(val[0])
        if val[0]>1:
            b.append('tolerant')
        elif val[0]<-1:
            b.append('sensitive')
        else:
            b.append('-')
    else:
        a.append(np.nan)
        b.append('-')
adata.obs['log2fc']=a
adata.obs['sensitivity']=b
adata.write('final_anndatasNEW/HCC4006.h5ad')

fig, ax = plt.subplots(1, 1, figsize=(7, 5))
scv.pl.scatter(adata, color='log2fc', s=80,show=False,ax=ax)
plt.show()
fig.savefig('figuresNEW/HCC4006/UMAPS_final/HCC4006log2fc.png',dpi=300, bbox_inches='tight')
adata4006=adata.copy()



adata=adata707.copy()

df=pd.read_csv('/da/onc/BFx/research/krishvi7/barcoding/expressed_barcode/20191219_4cellline_DNA/output/MGH707_log2fc.txt',sep='\t')
df=df.loc[df['comparator_group']=='14_vs_0']

a=[]
b=[]
for c in adata.obs['cloneid']:
    val=df.loc[df['clone_id']==c].log2fc.values.tolist()
    if len(val)>0:
        a.append(val[0])
        if val[0]>1:
            b.append('tolerant')
        elif val[0]<-1:
            b.append('sensitive')
        else:
            b.append('-')
    else:
        a.append(np.nan)
        b.append('-')
adata.obs['log2fc']=a
adata.obs['sensitivity']=b
adata.write('final_anndatasNEW/MGH707.h5ad')

fig, ax = plt.subplots(1, 1, figsize=(7, 5))
scv.pl.scatter(adata, color='log2fc', s=80,show=False,ax=ax)
plt.show()
fig.savefig('figuresNEW/MGH707/UMAPS_final/MGH707log2fc.png',dpi=300, bbox_inches='tight')
adata707=adata.copy()



adata=adataPC9.copy()

df=pd.read_csv('/da/onc/BFx/research/krishvi7/barcoding/expressed_barcode/20191219_4cellline_DNA/output/PC9_log2fc.txt',sep='\t')
df=df.loc[df['comparator_group']=='14_vs_0']

a=[]
b=[]
for c in adata.obs['cloneid']:
    val=df.loc[df['clone_id']==c].log2fc.values.tolist()
    if len(val)>0:
        a.append(val[0])
        if val[0]>1:
            b.append('tolerant')
        elif val[0]<-1:
            b.append('sensitive')
        else:
            b.append('-')
    else:
        a.append(np.nan)
        b.append('-')
adata.obs['log2fc']=a
adata.obs['sensitivity']=b
adata.write('final_anndatasNEW/PC9.h5ad')

fig, ax = plt.subplots(1, 1, figsize=(7, 5))
scv.pl.scatter(adata, color='log2fc', s=80,show=False,ax=ax)
plt.show()
fig.savefig('figuresNEW/PC9/UMAPS_final/PC9log2fc.png',dpi=300, bbox_inches='tight')
adataPC9=adata.copy()

In [None]:
adata=adata827.copy()

fig, ax = plt.subplots(2, 2, figsize=(15, 11))
ax=ax.ravel()
k=0
for t in adata.obs['timepoint'].unique():
    adataaux=adata[adata.obs['timepoint']==t]
    if t=='0hr':
        scv.pl.scatter(adata, color='white', size=300,ax=ax[k],add_outline=True,alpha=0,show=False)
        scv.pl.scatter(adataaux, color='log2fc', s=80,show=False,ax=ax[k],vmin=-5,vmax=5)
    else:
        scv.pl.scatter(adata, color='white', size=300,ax=ax[k],add_outline=True,alpha=0,show=False)
        scv.pl.scatter(adataaux, color='log2fc', s=80,show=False,ax=ax[k])
    ax[k].set_title('log2fc '+t,fontsize=28)
    k=k+1
plt.show()
fig.savefig('figuresNEW/HCC827/UMAPS_final/HCC827log2fc_timepoints.png',dpi=300, bbox_inches='tight')

adata=adata4006.copy()

fig, ax = plt.subplots(2, 2, figsize=(15, 11))
ax=ax.ravel()
k=0
for t in adata.obs['timepoint'].unique():
    adataaux=adata[adata.obs['timepoint']==t]
    scv.pl.scatter(adata, color='white', size=300,ax=ax[k],add_outline=True,alpha=0,show=False)
    scv.pl.scatter(adataaux, color='log2fc', s=80,show=False,ax=ax[k])
    ax[k].set_title('log2fc '+t,fontsize=28)
    k=k+1
plt.show()
fig.savefig('figuresNEW/HCC4006/UMAPS_final/HCC4006log2fc_timepoints.png',dpi=300, bbox_inches='tight')


adata=adata707.copy()

fig, ax = plt.subplots(2, 2, figsize=(15, 11))
ax=ax.ravel()
k=0
for t in adata.obs['timepoint'].unique():
    adataaux=adata[adata.obs['timepoint']==t]
    scv.pl.scatter(adata, color='white', size=300,ax=ax[k],add_outline=True,alpha=0,show=False)
    scv.pl.scatter(adataaux, color='log2fc', s=80,show=False,ax=ax[k])
    ax[k].set_title('log2fc '+t,fontsize=28)
    k=k+1
plt.show()
fig.savefig('figuresNEW/MGH707/UMAPS_final/MGH707log2fc_timepoints.png',dpi=300, bbox_inches='tight')



adata=adataPC9.copy()

fig, ax = plt.subplots(2, 2, figsize=(15, 11))
ax=ax.ravel()
k=0
for t in adata.obs['timepoint'].unique():
    adataaux=adata[adata.obs['timepoint']==t]
    scv.pl.scatter(adata, color='white', size=300,ax=ax[k],add_outline=True,alpha=0,show=False)
    scv.pl.scatter(adataaux, color='log2fc', s=80,show=False,ax=ax[k])
    ax[k].set_title('log2fc '+t,fontsize=28)
    k=k+1
plt.show()
fig.savefig('figuresNEW/PC9/UMAPS_final/PC9log2fc_timepoints.png',dpi=300, bbox_inches='tight')

In [None]:
adata=adata827.copy()
bcs=['clone00134','clone00135','clone00007']#['clone00134','clone00135','clone00129','clone00007','clone00172']

fig, ax = plt.subplots(1, 3, figsize=(30, 7))
ax=ax.ravel()
kk=0
for bc in bcs:
    axs=ax[kk]
    adataaux=adata[adata.obs['cloneid']==bc]    
    
    stratify='timepoint'
    feat='UMAP'
    xs = feat + '1'
    ys = feat + '2'
    expr = scrna.andata2df(adataaux)
    expr=expr.sort_values(stratify)
    axw = axs
    for c, color in zip(expr[stratify].unique(), colors):  # print cell type text
        mask = expr[stratify] == c
        expraux = expr.loc[mask, [xs, ys, stratify]]
        axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=300, marker='.', label=c,
                    linewidths=.3)
        xav = np.mean(expraux[xs])
        yav = np.mean(expraux[ys])
        ctype = expraux[stratify].values[0]
        #axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
        #         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
    axw.set_xlabel(xs)
    axw.set_ylabel(ys)

    
    scv.pl.scatter(adata, color='white', size=300,ax=axs,add_outline=True,alpha=0,show=False)
    axs.set_title(bc,fontsize=30)
    #sc.pl.umap(adata,alpha=0.1, s=100,ax=axs)
    kk=kk+1
    
plt.show()
fig.savefig('figuresNEW/HCC827/UMAPS_final/HCC827exampleClones.png',dpi=300, bbox_inches='tight')

#-----------------------------------------------------------------------------------------

adata=adata4006.copy()
bcs=['clone00037','clone00012','clone00003']#['clone00012','clone00004','clone00037','clone00042','clone00003']

fig, ax = plt.subplots(1, 3, figsize=(30, 7))
ax=ax.ravel()
kk=0
for bc in bcs:
    axs=ax[kk]
    adataaux=adata[adata.obs['cloneid']==bc]    
    
    stratify='timepoint'
    feat='UMAP'
    xs = feat + '1'
    ys = feat + '2'
    expr = scrna.andata2df(adataaux)
    expr=expr.sort_values(stratify)
    axw = axs
    for c, color in zip(expr[stratify].unique(), colors):  # print cell type text
        mask = expr[stratify] == c
        expraux = expr.loc[mask, [xs, ys, stratify]]
        axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=300, marker='.', label=c,
                    linewidths=.3)
        xav = np.mean(expraux[xs])
        yav = np.mean(expraux[ys])
        ctype = expraux[stratify].values[0]
        #axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
        #         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
    axw.set_xlabel(xs)
    axw.set_ylabel(ys)

    
    scv.pl.scatter(adata, color='white', size=300,ax=axs,add_outline=True,alpha=0,show=False)
    axs.set_title(bc,fontsize=30)
    #sc.pl.umap(adata,alpha=0.1, s=100,ax=axs)
    kk=kk+1
    
plt.show()
fig.savefig('figuresNEW/HCC4006/UMAPS_final/HCC4006exampleClones.png',dpi=300, bbox_inches='tight')





#-----------------------------------------------------------------------------------------

adata=adataPC9.copy()
bcs=['clone00772','clone00720','clone00738']#['clone00012','clone00004','clone00037','clone00042','clone00003']

fig, ax = plt.subplots(1, 3, figsize=(30, 7))
ax=ax.ravel()
kk=0
for bc in bcs:
    axs=ax[kk]
    adataaux=adata[adata.obs['cloneid']==bc]    
    
    stratify='timepoint'
    feat='UMAP'
    xs = feat + '1'
    ys = feat + '2'
    expr = scrna.andata2df(adataaux)
    expr=expr.sort_values(stratify)
    axw = axs
    for c, color in zip(expr[stratify].unique(), colors):  # print cell type text
        mask = expr[stratify] == c
        expraux = expr.loc[mask, [xs, ys, stratify]]
        axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=300, marker='.', label=c,
                    linewidths=.3)
        xav = np.mean(expraux[xs])
        yav = np.mean(expraux[ys])
        ctype = expraux[stratify].values[0]
        #axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
        #         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
    axw.set_xlabel(xs)
    axw.set_ylabel(ys)

    
    scv.pl.scatter(adata, color='white', size=300,ax=axs,add_outline=True,alpha=0,show=False)
    axs.set_title(bc,fontsize=30)
    #sc.pl.umap(adata,alpha=0.1, s=100,ax=axs)
    kk=kk+1
    
plt.show()
fig.savefig('figuresNEW/PC9/UMAPS_final/PC9exampleClones.png',dpi=300, bbox_inches='tight')


#-----------------------------------------------------------------------------------------

adata=adata707.copy()
bcs=['clone00041','clone00476','clone00036']#['clone00012','clone00004','clone00037','clone00042','clone00003']

fig, ax = plt.subplots(1, 3, figsize=(30, 7))
ax=ax.ravel()
kk=0
for bc in bcs:
    axs=ax[kk]
    adataaux=adata[adata.obs['cloneid']==bc]    
    
    stratify='timepoint'
    feat='UMAP'
    xs = feat + '1'
    ys = feat + '2'
    expr = scrna.andata2df(adataaux)
    expr=expr.sort_values(stratify)
    axw = axs
    for c, color in zip(expr[stratify].unique(), colors):  # print cell type text
        mask = expr[stratify] == c
        expraux = expr.loc[mask, [xs, ys, stratify]]
        axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=300, marker='.', label=c,
                    linewidths=.3)
        xav = np.mean(expraux[xs])
        yav = np.mean(expraux[ys])
        ctype = expraux[stratify].values[0]
        #axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
        #         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
    axw.set_xlabel(xs)
    axw.set_ylabel(ys)

    
    scv.pl.scatter(adata, color='white', size=300,ax=axs,add_outline=True,alpha=0,show=False)
    axs.set_title(bc,fontsize=30)
    #sc.pl.umap(adata,alpha=0.1, s=100,ax=axs)
    kk=kk+1
    
plt.show()
fig.savefig('figuresNEW/MGH707/UMAPS_final/MGH707exampleClones.png',dpi=300, bbox_inches='tight')

In [None]:
adata=adata707.copy()
bcs=adata.obs['cloneid'].unique()#['clone00012','clone00004','clone00037','clone00042','clone00003']

bcs=[b for b in bcs if len(adata[adata.obs['cloneid']==b])>5]
len(bcs)

In [None]:
adata=adata827.copy()
bcs=adata.obs['cloneid'].unique()#['clone00012','clone00004','clone00037','clone00042','clone00003']

bcs=[b for b in bcs if len(adata[adata.obs['cloneid']==b])>5 and  '-' not in b]

fig, ax = plt.subplots(14, 10, figsize=(40, 40))
ax=ax.ravel()
kk=0
for bc in bcs:
    axs=ax[kk]
    adataaux=adata[adata.obs['cloneid']==bc]    
    
    stratify='timepoint'
    feat='UMAP'
    xs = feat + '1'
    ys = feat + '2'
    expr = scrna.andata2df(adataaux)
    expr=expr.sort_values(stratify)
    axw = axs
    for c, color in zip(expr[stratify].unique(), colors):  # print cell type text
        mask = expr[stratify] == c
        expraux = expr.loc[mask, [xs, ys, stratify]]
        axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=100, marker='.', label=c,
                    linewidths=.3)
        xav = np.mean(expraux[xs])
        yav = np.mean(expraux[ys])
        ctype = expraux[stratify].values[0]
        #axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
        #         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
    axw.set_xlabel(xs)
    axw.set_ylabel(ys)

    
    scv.pl.scatter(adata, color='white', size=100,ax=axs,add_outline=True,alpha=0,show=False)
    axs.set_title(bc)#,fontsize=30)
    #sc.pl.umap(adata,alpha=0.1, s=100,ax=axs)
    kk=kk+1
fig.savefig('figuresNEW/HCC827/UMAPS_final/HCC827Clones.png',dpi=300, bbox_inches='tight')


In [None]:
adata=adata4006.copy()
bcs=adata.obs['cloneid'].unique()#['clone00012','clone00004','clone00037','clone00042','clone00003']

bcs=[b for b in bcs if len(adata[adata.obs['cloneid']==b])>5]

fig, ax = plt.subplots(9, 8, figsize=(30, 30))
ax=ax.ravel()
kk=0
for bc in bcs:
    axs=ax[kk]
    adataaux=adata[adata.obs['cloneid']==bc]    
    
    stratify='timepoint'
    feat='UMAP'
    xs = feat + '1'
    ys = feat + '2'
    expr = scrna.andata2df(adataaux)
    expr=expr.sort_values(stratify)
    axw = axs
    for c, color in zip(expr[stratify].unique(), colors):  # print cell type text
        mask = expr[stratify] == c
        expraux = expr.loc[mask, [xs, ys, stratify]]
        axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=100, marker='.', label=c,
                    linewidths=.3)
        xav = np.mean(expraux[xs])
        yav = np.mean(expraux[ys])
        ctype = expraux[stratify].values[0]
        #axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
        #         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
    axw.set_xlabel(xs)
    axw.set_ylabel(ys)

    
    scv.pl.scatter(adata, color='white', size=100,ax=axs,add_outline=True,alpha=0,show=False)
    axs.set_title(bc)#,fontsize=30)
    #sc.pl.umap(adata,alpha=0.1, s=100,ax=axs)
    kk=kk+1
fig.savefig('figuresNEW/HCC4006/UMAPS_final/HCC4006Clones.png',dpi=300, bbox_inches='tight')

In [None]:
adata=adataPC9.copy()
bcs=adata.obs['cloneid'].unique()#['clone00012','clone00004','clone00037','clone00042','clone00003']

bcs=[b for b in bcs if len(adata[adata.obs['cloneid']==b])>5 and  '-' not in b]

fig, ax = plt.subplots(12, 10, figsize=(40, 40))
ax=ax.ravel()
kk=0
for bc in bcs:
    axs=ax[kk]
    adataaux=adata[adata.obs['cloneid']==bc]    
    
    stratify='timepoint'
    feat='UMAP'
    xs = feat + '1'
    ys = feat + '2'
    expr = scrna.andata2df(adataaux)
    expr=expr.sort_values(stratify)
    axw = axs
    for c, color in zip(expr[stratify].unique(), colors):  # print cell type text
        mask = expr[stratify] == c
        expraux = expr.loc[mask, [xs, ys, stratify]]
        axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=100, marker='.', label=c,
                    linewidths=.3)
        xav = np.mean(expraux[xs])
        yav = np.mean(expraux[ys])
        ctype = expraux[stratify].values[0]
        #axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
        #         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
    axw.set_xlabel(xs)
    axw.set_ylabel(ys)

    
    scv.pl.scatter(adata, color='white', size=100,ax=axs,add_outline=True,alpha=0,show=False)
    axs.set_title(bc)#,fontsize=30)
    #sc.pl.umap(adata,alpha=0.1, s=100,ax=axs)
    kk=kk+1
fig.savefig('figuresNEW/PC9/UMAPS_final/PC9Clones.png',dpi=300, bbox_inches='tight')

In [None]:
adata=adata707.copy()
bcs=adata.obs['cloneid'].unique()#['clone00012','clone00004','clone00037','clone00042','clone00003']

bcs=[b for b in bcs if len(adata[adata.obs['cloneid']==b])>5 and  '-' not in b]

fig, ax = plt.subplots(9, 8, figsize=(40, 40))
ax=ax.ravel()
kk=0
for bc in bcs:
    axs=ax[kk]
    adataaux=adata[adata.obs['cloneid']==bc]    
    
    stratify='timepoint'
    feat='UMAP'
    xs = feat + '1'
    ys = feat + '2'
    expr = scrna.andata2df(adataaux)
    expr=expr.sort_values(stratify)
    axw = axs
    for c, color in zip(expr[stratify].unique(), colors):  # print cell type text
        mask = expr[stratify] == c
        expraux = expr.loc[mask, [xs, ys, stratify]]
        axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=100, marker='.', label=c,
                    linewidths=.3)
        xav = np.mean(expraux[xs])
        yav = np.mean(expraux[ys])
        ctype = expraux[stratify].values[0]
        #axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
        #         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
    axw.set_xlabel(xs)
    axw.set_ylabel(ys)

    
    scv.pl.scatter(adata, color='white', size=100,ax=axs,add_outline=True,alpha=0,show=False)
    axs.set_title(bc)#,fontsize=30)
    #sc.pl.umap(adata,alpha=0.1, s=100,ax=axs)
    kk=kk+1
fig.savefig('figuresNEW/MGH707/UMAPS_final/MGH707Clones.png',dpi=300, bbox_inches='tight')

In [None]:
adata=adata827.copy()
df=adata.obs[['timepoint','cloneid','trajectory_class']]
bcs=[]
change=[]
trajclass=[]
for bc in df['cloneid'].unique():
    df2=df.loc[df['cloneid']==bc]
    
    scr=len(df2.loc[df2['timepoint']=='0hr'])
    ontx=len(df2.loc[df2['timepoint']=='14day'])
    tc=df2['trajectory_class']
    if scr>0 and ontx>0:
        bcs.append(bc)
        change.append(ontx-scr)
        trajclass.append(tc.values[0])

res=pd.DataFrame()
res['cloneid']=bcs
res['difference 14day VS 0hr']=change
res['trajectory_class']=trajclass
res=res.loc[res['trajectory_class']!='-']
res

sns.swarmplot(data=res,x='trajectory_class',y='difference 14day VS 0hr')


In [None]:
adata=adata827.copy()

category='trajectory_class'
groupby='trajectory_class'
conditions='timepoint'


#keep only barcodes represented accross all timepoints
adataaux=adata[adata.obs['trajectory_class']!='-']#look only at actual classes

stratify=groupby

expr = scrna.andata2df(adataaux)
if groupby=='trajectory_class':
    expr=expr.loc[expr[category]!='-']
expr = expr.sort_values(stratify)

df = expr[[stratify, conditions, 'Barcode']]
# construct dataframe with cell type proportions depending on treatment
cell_numbers = df.groupby([stratify, conditions]).nunique()['Barcode']
cell_numbers=cell_numbers.to_frame().reset_index()
prop_cells   = df.groupby([stratify, conditions]).nunique()['Barcode'] / df.groupby([conditions]).nunique()['Barcode']
prop_cells = prop_cells.to_frame()
prop_cells.reset_index(inplace=True)

#plot
from matplotlib.pyplot import cm

fig, axs = plt.subplots(2, 2, figsize=(22, 15))
axs=axs.ravel()

df=prop_cells.sort_values(groupby)#('Barcode')

#conds=['0hr','24hr','14day','14day MET amp','14day EMT','16day','16day MET amp','16day EMT']#df.timepoint.unique()
conds=['0hr','24hr','14day','16day']

exp_bcs=[i for i in df[groupby].unique()]
color=cm.Set2(np.linspace(0,1,len(exp_bcs)))#cm.tab10(np.linspace(0,1,len(exp_bcs)))
#np.random.shuffle(color)

r = list(range(len(df[conditions].unique())))

vals=np.zeros(len(r)).tolist()
k=0
for bc in exp_bcs:
    bars=[]
    dfaux=df.loc[df[groupby]==bc]
    
    for t in conds:
        if t in dfaux[conditions].unique():
            dfaux2=dfaux.loc[dfaux[conditions]==t]
            value=dfaux2.Barcode.values[0]
        else:
            value=0
        bars.append(value)
    
        
    axs[0].bar(r, bars,bottom=vals, width=1,edgecolor=None,color=color[k])#, width=barWidth)
    vals=[bars[i]+vals[i] for i in range(len(bars))]
    
    tendency=[bars[k]-bars[0] for k in range(len(bars))]
    axs[1].plot(r,bars,color=color[k],linewidth=5,label=bc)
    axs[2].plot(r,tendency,color=color[k],linewidth=5,label=bc)
    #axs[1].text(r[-2],bars[-2]+0.02,bc,fontsize=20)

    k=k+1
axs[1].legend(fontsize=20)


axs[0].set_ylabel(groupby+' proportion',fontsize=20)
axs[0].set_xticks(list(range(len(r)))) 
axs[0].set_xticklabels(conds,fontsize=20)
axs[0].set_yticklabels([0,0.2,0.4,0.6,0.8,1],fontsize=20)
axs[0].set_xlim([-0.5,3.5])
axs[0].set_ylim([0,1])

axs[1].set_ylabel(groupby+' proportion trend',fontsize=20)
axs[1].set_xticks(list(range(len(r)))) 
axs[1].set_yticklabels([0,0.2,0.4,0.6,0.8,1],fontsize=20)
axs[1].set_xticklabels(conds,fontsize=20)
axs[1].set_ylim([0,1])

axs[2].set_ylabel(groupby+' proportion change',fontsize=20)
axs[2].set_xticks(list(range(len(r)))) 
#axs[2].set_yticklabels([-0.5,0.5],fontsize=20)
axs[2].set_xticklabels(conds,fontsize=20)
axs[2].set_ylim([-1,1])


#last plot, axs[3]
df=adata.obs[['timepoint','cloneid','trajectory_class']]
bcs=[]
change=[]
trajclass=[]
for bc in df['cloneid'].unique():
    df2=df.loc[df['cloneid']==bc]
    
    scr=len(df2.loc[df2['timepoint']=='0hr'])
    ontx=len(df2.loc[df2['timepoint']=='14day'])
    tc=df2['trajectory_class']
    if scr>0 and ontx>0:
        bcs.append(bc)
        change.append(ontx-scr)
        trajclass.append(tc.values[0])

res=pd.DataFrame()
res['cloneid']=bcs
res['difference 14day VS 0hr']=change
res['trajectory_class']=trajclass
res=res.loc[res['trajectory_class']!='-']
res
sns.boxplot(data=res,x='trajectory_class',y='difference 14day VS 0hr',ax=axs[3],color='white')
sns.swarmplot(data=res,x='trajectory_class',y='difference 14day VS 0hr',ax=axs[3],size=10,color='black')
ax=axs[3]
for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
              ax.get_xticklabels() + ax.get_yticklabels()):
    item.set_fontsize(20)

plt.show()
fig.savefig('figuresNEW/HCC827/HCC827trajProportions.pdf', bbox_inches='tight')

#df=prop_cells.pivot(index=groupby, columns='timepoint', values='Barcode')
#sns.clustermap(df.replace(np.nan,0),z_score=None,col_cluster=True,cmap='seismic',figsize=(7,5))
#plt.show()




adata=adata4006.copy()

category='trajectory_class'
groupby='trajectory_class'
conditions='timepoint'


#keep only barcodes represented accross all timepoints
adataaux=adata[adata.obs['trajectory_class']!='-']#look only at actual classes

stratify=groupby

expr = scrna.andata2df(adataaux)
if groupby=='trajectory_class':
    expr=expr.loc[expr[category]!='-']
expr = expr.sort_values(stratify)

df = expr[[stratify, conditions, 'Barcode']]
# construct dataframe with cell type proportions depending on treatment
cell_numbers = df.groupby([stratify, conditions]).nunique()['Barcode']
cell_numbers=cell_numbers.to_frame().reset_index()
prop_cells   = df.groupby([stratify, conditions]).nunique()['Barcode'] / df.groupby([conditions]).nunique()['Barcode']
prop_cells = prop_cells.to_frame()
prop_cells.reset_index(inplace=True)

#plot
from matplotlib.pyplot import cm

fig, axs = plt.subplots(2, 2, figsize=(22, 15))
axs=axs.ravel()

df=prop_cells.sort_values(groupby)#('Barcode')

#conds=['0hr','24hr','14day','14day MET amp','14day EMT','16day','16day MET amp','16day EMT']#df.timepoint.unique()
conds=['0hr','24hr','14day','16day']

exp_bcs=[i for i in df[groupby].unique()]
color=cm.Set2(np.linspace(0,1,len(exp_bcs)))#cm.tab10(np.linspace(0,1,len(exp_bcs)))
#np.random.shuffle(color)

r = list(range(len(df[conditions].unique())))

vals=np.zeros(len(r)).tolist()
k=0
for bc in exp_bcs:
    bars=[]
    dfaux=df.loc[df[groupby]==bc]
    
    for t in conds:
        if t in dfaux[conditions].unique():
            dfaux2=dfaux.loc[dfaux[conditions]==t]
            value=dfaux2.Barcode.values[0]
        else:
            value=0
        bars.append(value)
    
        
    axs[0].bar(r, bars,bottom=vals, width=1,edgecolor=None,color=color[k])#, width=barWidth)
    vals=[bars[i]+vals[i] for i in range(len(bars))]
    
    tendency=[bars[k]-bars[0] for k in range(len(bars))]
    axs[1].plot(r,bars,color=color[k],linewidth=5,label=bc)
    axs[2].plot(r,tendency,color=color[k],linewidth=5,label=bc)
    #axs[1].text(r[-2],bars[-2]+0.02,bc,fontsize=20)

    k=k+1
axs[1].legend(fontsize=20)


axs[0].set_ylabel(groupby+' proportion',fontsize=20)
axs[0].set_xticks(list(range(len(r)))) 
axs[0].set_xticklabels(conds,fontsize=20)
axs[0].set_yticklabels([0,0.2,0.4,0.6,0.8,1],fontsize=20)
axs[0].set_xlim([-0.5,3.5])
axs[0].set_ylim([0,1])

axs[1].set_ylabel(groupby+' proportion trend',fontsize=20)
axs[1].set_xticks(list(range(len(r)))) 
axs[1].set_yticklabels([0,0.2,0.4,0.6,0.8,1],fontsize=20)
axs[1].set_xticklabels(conds,fontsize=20)
axs[1].set_ylim([0,1])

axs[2].set_ylabel(groupby+' proportion change',fontsize=20)
axs[2].set_xticks(list(range(len(r)))) 
#axs[2].set_yticklabels([-0.5,0.5],fontsize=20)
axs[2].set_xticklabels(conds,fontsize=20)
axs[2].set_ylim([-1,1])


#last plot, axs[3]
df=adata.obs[['timepoint','cloneid','trajectory_class']]
bcs=[]
change=[]
trajclass=[]
for bc in df['cloneid'].unique():
    df2=df.loc[df['cloneid']==bc]
    
    scr=len(df2.loc[df2['timepoint']=='0hr'])
    ontx=len(df2.loc[df2['timepoint']=='14day'])
    tc=df2['trajectory_class']
    if scr>0 and ontx>0:
        bcs.append(bc)
        change.append(ontx-scr)
        trajclass.append(tc.values[0])

res=pd.DataFrame()
res['cloneid']=bcs
res['difference 14day VS 0hr']=change
res['trajectory_class']=trajclass
res=res.loc[res['trajectory_class']!='-']
res
sns.boxplot(data=res,x='trajectory_class',y='difference 14day VS 0hr',ax=axs[3],color='white')
sns.swarmplot(data=res,x='trajectory_class',y='difference 14day VS 0hr',ax=axs[3],size=10,color='black')
ax=axs[3]
for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
              ax.get_xticklabels() + ax.get_yticklabels()):
    item.set_fontsize(20)

plt.show()
fig.savefig('figuresNEW/HCC4006/HCC4006trajProportions.pdf', bbox_inches='tight')



adata=adataPC9.copy()

category='trajectory_class'
groupby='trajectory_class'
conditions='timepoint'


#keep only barcodes represented accross all timepoints
adataaux=adata[adata.obs['trajectory_class']!='-']#look only at actual classes

stratify=groupby

expr = scrna.andata2df(adataaux)
if groupby=='trajectory_class':
    expr=expr.loc[expr[category]!='-']
expr = expr.sort_values(stratify)

df = expr[[stratify, conditions, 'Barcode']]
# construct dataframe with cell type proportions depending on treatment
cell_numbers = df.groupby([stratify, conditions]).nunique()['Barcode']
cell_numbers=cell_numbers.to_frame().reset_index()
prop_cells   = df.groupby([stratify, conditions]).nunique()['Barcode'] / df.groupby([conditions]).nunique()['Barcode']
prop_cells = prop_cells.to_frame()
prop_cells.reset_index(inplace=True)

#plot
from matplotlib.pyplot import cm

fig, axs = plt.subplots(2, 2, figsize=(22, 15))
axs=axs.ravel()

df=prop_cells.sort_values(groupby)#('Barcode')

#conds=['0hr','24hr','14day','14day MET amp','14day EMT','16day','16day MET amp','16day EMT']#df.timepoint.unique()
conds=['0hr','24hr','14day','16day']

exp_bcs=[i for i in df[groupby].unique()]
color=cm.Set2(np.linspace(0,1,len(exp_bcs)))#cm.tab10(np.linspace(0,1,len(exp_bcs)))
#np.random.shuffle(color)

r = list(range(len(df[conditions].unique())))

vals=np.zeros(len(r)).tolist()
k=0
for bc in exp_bcs:
    bars=[]
    dfaux=df.loc[df[groupby]==bc]
    
    for t in conds:
        if t in dfaux[conditions].unique():
            dfaux2=dfaux.loc[dfaux[conditions]==t]
            value=dfaux2.Barcode.values[0]
        else:
            value=0
        bars.append(value)
    
        
    axs[0].bar(r, bars,bottom=vals, width=1,edgecolor=None,color=color[k])#, width=barWidth)
    vals=[bars[i]+vals[i] for i in range(len(bars))]
    
    tendency=[bars[k]-bars[0] for k in range(len(bars))]
    axs[1].plot(r,bars,color=color[k],linewidth=5,label=bc)
    axs[2].plot(r,tendency,color=color[k],linewidth=5,label=bc)
    #axs[1].text(r[-2],bars[-2]+0.02,bc,fontsize=20)

    k=k+1
axs[1].legend(fontsize=20)


axs[0].set_ylabel(groupby+' proportion',fontsize=20)
axs[0].set_xticks(list(range(len(r)))) 
axs[0].set_xticklabels(conds,fontsize=20)
axs[0].set_yticklabels([0,0.2,0.4,0.6,0.8,1],fontsize=20)
axs[0].set_xlim([-0.5,3.5])
axs[0].set_ylim([0,1])

axs[1].set_ylabel(groupby+' proportion trend',fontsize=20)
axs[1].set_xticks(list(range(len(r)))) 
axs[1].set_yticklabels([0,0.2,0.4,0.6,0.8,1],fontsize=20)
axs[1].set_xticklabels(conds,fontsize=20)
axs[1].set_ylim([0,1])

axs[2].set_ylabel(groupby+' proportion change',fontsize=20)
axs[2].set_xticks(list(range(len(r)))) 
#axs[2].set_yticklabels([-0.5,0.5],fontsize=20)
axs[2].set_xticklabels(conds,fontsize=20)
axs[2].set_ylim([-1,1])


#last plot, axs[3]
df=adata.obs[['timepoint','cloneid','trajectory_class']]
bcs=[]
change=[]
trajclass=[]
for bc in df['cloneid'].unique():
    df2=df.loc[df['cloneid']==bc]
    
    scr=len(df2.loc[df2['timepoint']=='0hr'])
    ontx=len(df2.loc[df2['timepoint']=='14day'])
    tc=df2['trajectory_class']
    if scr>0 and ontx>0:
        bcs.append(bc)
        change.append(ontx-scr)
        trajclass.append(tc.values[0])

res=pd.DataFrame()
res['cloneid']=bcs
res['difference 14day VS 0hr']=change
res['trajectory_class']=trajclass
res=res.loc[res['trajectory_class']!='-']
res
sns.boxplot(data=res,x='trajectory_class',y='difference 14day VS 0hr',ax=axs[3],color='white')
sns.swarmplot(data=res,x='trajectory_class',y='difference 14day VS 0hr',ax=axs[3],size=10,color='black')
ax=axs[3]
for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
              ax.get_xticklabels() + ax.get_yticklabels()):
    item.set_fontsize(20)

plt.show()
fig.savefig('figuresNEW/PC9/PC9trajProportions.pdf', bbox_inches='tight')



adata=adata707.copy()

category='trajectory_class'
groupby='trajectory_class'
conditions='timepoint'


#keep only barcodes represented accross all timepoints
adataaux=adata[adata.obs['trajectory_class']!='-']#look only at actual classes

stratify=groupby

expr = scrna.andata2df(adataaux)
if groupby=='trajectory_class':
    expr=expr.loc[expr[category]!='-']
expr = expr.sort_values(stratify)

df = expr[[stratify, conditions, 'Barcode']]
# construct dataframe with cell type proportions depending on treatment
cell_numbers = df.groupby([stratify, conditions]).nunique()['Barcode']
cell_numbers=cell_numbers.to_frame().reset_index()
prop_cells   = df.groupby([stratify, conditions]).nunique()['Barcode'] / df.groupby([conditions]).nunique()['Barcode']
prop_cells = prop_cells.to_frame()
prop_cells.reset_index(inplace=True)

#plot
from matplotlib.pyplot import cm

fig, axs = plt.subplots(2, 2, figsize=(22, 15))
axs=axs.ravel()

df=prop_cells.sort_values(groupby)#('Barcode')

#conds=['0hr','24hr','14day','14day MET amp','14day EMT','16day','16day MET amp','16day EMT']#df.timepoint.unique()
conds=['0hr','24hr','14day','16day']

exp_bcs=[i for i in df[groupby].unique()]
color=cm.Set2(np.linspace(0,1,len(exp_bcs)))#cm.tab10(np.linspace(0,1,len(exp_bcs)))
#np.random.shuffle(color)

r = list(range(len(df[conditions].unique())))

vals=np.zeros(len(r)).tolist()
k=0
for bc in exp_bcs:
    bars=[]
    dfaux=df.loc[df[groupby]==bc]
    
    for t in conds:
        if t in dfaux[conditions].unique():
            dfaux2=dfaux.loc[dfaux[conditions]==t]
            value=dfaux2.Barcode.values[0]
        else:
            value=0
        bars.append(value)
    
        
    axs[0].bar(r, bars,bottom=vals, width=1,edgecolor=None,color=color[k])#, width=barWidth)
    vals=[bars[i]+vals[i] for i in range(len(bars))]
    
    tendency=[bars[k]-bars[0] for k in range(len(bars))]
    axs[1].plot(r,bars,color=color[k],linewidth=5,label=bc)
    axs[2].plot(r,tendency,color=color[k],linewidth=5,label=bc)
    #axs[1].text(r[-2],bars[-2]+0.02,bc,fontsize=20)

    k=k+1
axs[1].legend(fontsize=20)


axs[0].set_ylabel(groupby+' proportion',fontsize=20)
axs[0].set_xticks(list(range(len(r)))) 
axs[0].set_xticklabels(conds,fontsize=20)
axs[0].set_yticklabels([0,0.2,0.4,0.6,0.8,1],fontsize=20)
axs[0].set_xlim([-0.5,3.5])
axs[0].set_ylim([0,1])

axs[1].set_ylabel(groupby+' proportion trend',fontsize=20)
axs[1].set_xticks(list(range(len(r)))) 
axs[1].set_yticklabels([0,0.2,0.4,0.6,0.8,1],fontsize=20)
axs[1].set_xticklabels(conds,fontsize=20)
axs[1].set_ylim([0,1])

axs[2].set_ylabel(groupby+' proportion change',fontsize=20)
axs[2].set_xticks(list(range(len(r)))) 
#axs[2].set_yticklabels([-0.5,0.5],fontsize=20)
axs[2].set_xticklabels(conds,fontsize=20)
axs[2].set_ylim([-1,1])


#last plot, axs[3]
df=adata.obs[['timepoint','cloneid','trajectory_class']]
bcs=[]
change=[]
trajclass=[]
for bc in df['cloneid'].unique():
    df2=df.loc[df['cloneid']==bc]
    
    scr=len(df2.loc[df2['timepoint']=='0hr'])
    ontx=len(df2.loc[df2['timepoint']=='14day'])
    tc=df2['trajectory_class']
    if scr>0 and ontx>0:
        bcs.append(bc)
        change.append(ontx-scr)
        trajclass.append(tc.values[0])

res=pd.DataFrame()
res['cloneid']=bcs
res['difference 14day VS 0hr']=change
res['trajectory_class']=trajclass
res=res.loc[res['trajectory_class']!='-']
res
sns.boxplot(data=res,x='trajectory_class',y='difference 14day VS 0hr',ax=axs[3],color='white')
sns.swarmplot(data=res,x='trajectory_class',y='difference 14day VS 0hr',ax=axs[3],size=10,color='black')
ax=axs[3]
for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
              ax.get_xticklabels() + ax.get_yticklabels()):
    item.set_fontsize(20)

plt.show()
fig.savefig('figuresNEW/MGH707/MGH707trajProportions.pdf', bbox_inches='tight')

In [None]:
def heatmap_with_size(x, y, size):
    fig, ax = plt.subplots()
    
    # Mapping from column names to integer coordinates
    x_labels = [v for v in sorted(x.unique())]
    y_labels = [v for v in sorted(y.unique())]
    x_to_num = {p[1]:p[0] for p in enumerate(x_labels)} 
    y_to_num = {p[1]:p[0] for p in enumerate(y_labels)} 
    
    size_scale = 500
    ax.scatter(
        x=x.map(x_to_num), # Use mapping for x
        y=y.map(y_to_num), # Use mapping for y
        s=size * size_scale, # Vector of square sizes, proportional to size parameter
        marker='s' # Use square as scatterplot marker
    )
    
    # Show column labels on the axes
    ax.set_xticks([x_to_num[v] for v in x_labels])
    ax.set_xticklabels(x_labels, rotation=45, horizontalalignment='right')
    ax.set_yticks([y_to_num[v] for v in y_labels])
    ax.set_yticklabels(y_labels)

In [None]:
adata=adata707.copy()

#Look into YAP signature
YAP=pd.read_excel('YAP_73gene_signature_and_CHiP_targets_from_Laurent_Sansregret.xlsx',header=None)
genes=YAP[0].tolist()
expr=scrna.andata2df(adata)
genes=[g for g in genes if g in expr.columns]
score = expr.loc[:, genes].mean(axis=1)
adata.obs['YAP_Sig'] = score

#Mesenchymal signature https://www.nature.com/articles/s41598-018-21061-1
mes_sig=['VIM','CDH2','FOXC2','SNAI1','SNAI2','TWIST1','GSC','FN1','ITBG6','MMP2','MMP3','MMP9','SOX10']
mes_sig=[g for g in mes_sig if g in adata.var_names]
expr=scrna.andata2df(adata)
genes=[g for g in genes if g in expr.columns]
score = expr.loc[:, mes_sig].mean(axis=1)
adata.obs['Mesenchymal_Sig'] = score

df=scrna.andata2df(adata)
df=df.loc[df[category]!='-']


fig, axs = plt.subplots(2,4, figsize=(30, 12))
axs=axs.ravel()

k=0
for g in ['YAP_Sig','MET','CDH1','VIM']:
    scv.pl.scatter(adata, color=g, s=50,ax=axs[k],show=False)
    
    #average expression
    df2=df[[g,'timepoint',category]]
    df2=df2.groupby(['timepoint',category]).mean().reset_index().pivot(index='timepoint', columns=category, values=g)
    df2=df2.T
    df2=df2[['0hr','24hr','14day','16day']].dropna(how='all')
    
    #number of cells
    df2_2=df[[g,'timepoint',category]]
    df2_2=df2_2.groupby(['timepoint',category]).count().reset_index().pivot(index='timepoint', columns=category, values=g)
    df2_2=df2_2.T
    df2_2=df2_2[['0hr','24hr','14day','16day']].dropna(how='all')
    #df2=pd.melt(df2)
    #heatmap_with_size(df2['timepoint'], df2['value'], size=df2['value'])
   # sns.heatmap(df2,cmap='viridis',ax=axs[k+4])
    dotmap(df2,df2_2,cmap='seismic',ax=axs[k+4],cbarlabel='',marker='o')
    k=k+1
    
#set font size
for ax in axs:
    for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                  ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(20)
#fig.tight_layout()
plt.show()
fig.savefig('figuresNEW/MGH707/UMAPS_final/MGH707resistMechan.png',dpi=300, bbox_inches='tight')

In [None]:
adata=adata707.copy()

EGF=pd.read_excel('Gene_signatures_for_EGF816x2101_RNAseq_data.xlsx')
fig, axs = plt.subplots(3,4, figsize=(30, 15))
axs=axs.ravel()
k=0
for c in EGF.columns:
    genes=EGF[c].dropna().tolist()
    if len(genes)>0:
        expr=scrna.andata2df(adata)
        genes=[g for g in genes if g in expr.columns]
        score = expr.loc[:, genes].mean(axis=1)
        adata.obs[c+'_Sig'] = score

        scv.pl.scatter(adata, color=c+'_Sig', s=50,ax=axs[k],show=False)
        k=k+1
plt.show()
fig.savefig('figuresNEW/MGH707/UMAPS_final/MGH707Various_signaturesUMAP.png',dpi=300, bbox_inches='tight')


fig, axs = plt.subplots(3,4, figsize=(30, 15))
axs=axs.ravel()
k=0
df=scrna.andata2df(adata)
df=df.loc[df[category]!='-']
for c in EGF.columns:
    genes=EGF[c].dropna().tolist()
    if len(genes)>0:
        g=c+'_Sig'
        #average expression
        df2=df[[g,'timepoint',category]]
        df2=df2.groupby(['timepoint',category]).mean().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2=df2.T
        df2=df2[['0hr','24hr','14day','16day']].dropna(how='all')

        #number of cells
        df2_2=df[[g,'timepoint',category]]
        df2_2=df2_2.groupby(['timepoint',category]).count().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2_2=df2_2.T
        df2_2=df2_2[['0hr','24hr','14day','16day']].dropna(how='all')
        dotmap(df2,df2_2,cmap='seismic',ax=axs[k],cbarlabel=g,marker='o')
        k=k+1
plt.tight_layout()
plt.show()
fig.savefig('figuresNEW/MGH707/DOTPLOT_final/MGH707Various_signaturesDOTPLOT.png',dpi=300, bbox_inches='tight')

In [None]:
adata=adata707.copy()


EGF=pd.read_excel('Metacore_gene_sets_from_Tina.xlsx')
fig, axs = plt.subplots(4,4, figsize=(30, 20))
axs=axs.ravel()
k=0
for c in EGF['Geneset'].unique():
    dfaux=EGF.loc[EGF['Geneset']==c]
    genes=dfaux['symbol'].tolist()
    if len(genes)>0:
        expr=scrna.andata2df(adata)
        genes=[g for g in genes if g in expr.columns]
        score = expr.loc[:, genes].mean(axis=1)
        adata.obs[c+'_Sig'] = score

        scv.pl.scatter(adata, color=c+'_Sig', s=50,ax=axs[k],show=False)
        k=k+1
plt.show()
fig.savefig('figuresNEW/MGH707/UMAPS_final/MGH707Various_signaturesTina_UMAP.png',dpi=300, bbox_inches='tight')


fig, axs = plt.subplots(4,4, figsize=(30, 24))
axs=axs.ravel()
k=0
df=scrna.andata2df(adata)
df=df.loc[df[category]!='-']
for c in EGF['Geneset'].unique():
    dfaux=EGF.loc[EGF['Geneset']==c]
    genes=dfaux['symbol'].tolist()
    if len(genes)>0:
        g=c+'_Sig'
        #average expression
        df2=df[[g,'timepoint',category]]
        df2=df2.groupby(['timepoint',category]).mean().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2=df2.T
        df2=df2[['0hr','24hr','14day','16day']].dropna(how='all')

        #number of cells
        df2_2=df[[g,'timepoint',category]]
        df2_2=df2_2.groupby(['timepoint',category]).count().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2_2=df2_2.T
        df2_2=df2_2[['0hr','24hr','14day','16day']].dropna(how='all')
        dotmap(df2,df2_2,cmap='seismic',ax=axs[k],cbarlabel=g,marker='o')
        k=k+1
plt.tight_layout()

plt.show()
fig.savefig('figuresNEW/MGH707/DOTPLOT_final/MGH707Various_signaturesTina_DOTPLOT.png',dpi=300, bbox_inches='tight')

In [None]:
adata=adataPC9.copy()

#Look into YAP signature
YAP=pd.read_excel('YAP_73gene_signature_and_CHiP_targets_from_Laurent_Sansregret.xlsx',header=None)
genes=YAP[0].tolist()
expr=scrna.andata2df(adata)
genes=[g for g in genes if g in expr.columns]
score = expr.loc[:, genes].mean(axis=1)
adata.obs['YAP_Sig'] = score

#Mesenchymal signature https://www.nature.com/articles/s41598-018-21061-1
mes_sig=['VIM','CDH2','FOXC2','SNAI1','SNAI2','TWIST1','GSC','FN1','ITBG6','MMP2','MMP3','MMP9','SOX10']
mes_sig=[g for g in mes_sig if g in adata.var_names]
expr=scrna.andata2df(adata)
genes=[g for g in genes if g in expr.columns]
score = expr.loc[:, mes_sig].mean(axis=1)
adata.obs['Mesenchymal_Sig'] = score

df=scrna.andata2df(adata)
df=df.loc[df[category]!='-']


fig, axs = plt.subplots(2,4, figsize=(30, 12))
axs=axs.ravel()

k=0
for g in ['YAP_Sig','MET','CDH1','VIM']:
    scv.pl.scatter(adata, color=g, s=50,ax=axs[k],show=False)
    
    #average expression
    df2=df[[g,'timepoint',category]]
    df2=df2.groupby(['timepoint',category]).mean().reset_index().pivot(index='timepoint', columns=category, values=g)
    df2=df2.T
    df2=df2[['0hr','24hr','14day','16day']].dropna(how='all')
    
    #number of cells
    df2_2=df[[g,'timepoint',category]]
    df2_2=df2_2.groupby(['timepoint',category]).count().reset_index().pivot(index='timepoint', columns=category, values=g)
    df2_2=df2_2.T
    df2_2=df2_2[['0hr','24hr','14day','16day']].dropna(how='all')
    #df2=pd.melt(df2)
    #heatmap_with_size(df2['timepoint'], df2['value'], size=df2['value'])
   # sns.heatmap(df2,cmap='viridis',ax=axs[k+4])
    dotmap(df2,df2_2,cmap='seismic',ax=axs[k+4],cbarlabel='',marker='o')
    k=k+1
    
#set font size
for ax in axs:
    for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                  ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(20)
#plt.tight_layout()
plt.show()
fig.savefig('figuresNEW/PC9/PC9resistMechan.png',dpi=300, bbox_inches='tight')

In [None]:
adata=adataPC9.copy()

EGF=pd.read_excel('Gene_signatures_for_EGF816x2101_RNAseq_data.xlsx')
fig, axs = plt.subplots(3,4, figsize=(30, 15))
axs=axs.ravel()
k=0
for c in EGF.columns:
    genes=EGF[c].dropna().tolist()
    if len(genes)>0:
        expr=scrna.andata2df(adata)
        genes=[g for g in genes if g in expr.columns]
        score = expr.loc[:, genes].mean(axis=1)
        adata.obs[c+'_Sig'] = score

        scv.pl.scatter(adata, color=c+'_Sig', s=50,ax=axs[k],show=False)
        k=k+1
plt.show()
fig.savefig('figuresNEW/PC9/UMAPS_final/PC9Various_signaturesUMAP.png',dpi=300, bbox_inches='tight')


fig, axs = plt.subplots(3,4, figsize=(30, 15))
axs=axs.ravel()
k=0
df=scrna.andata2df(adata)
df=df.loc[df[category]!='-']
for c in EGF.columns:
    genes=EGF[c].dropna().tolist()
    if len(genes)>0:
        g=c+'_Sig'
        #average expression
        df2=df[[g,'timepoint',category]]
        df2=df2.groupby(['timepoint',category]).mean().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2=df2.T
        df2=df2[['0hr','24hr','14day','16day']].dropna(how='all')

        #number of cells
        df2_2=df[[g,'timepoint',category]]
        df2_2=df2_2.groupby(['timepoint',category]).count().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2_2=df2_2.T
        df2_2=df2_2[['0hr','24hr','14day','16day']].dropna(how='all')
        dotmap(df2,df2_2,cmap='seismic',ax=axs[k],cbarlabel=g,marker='o')
        k=k+1
plt.tight_layout()
plt.show()
fig.savefig('figuresNEW/PC9/DOTPLOT_final/PC9Various_signaturesDOTPLOT.png',dpi=300, bbox_inches='tight')

In [None]:
adata=adataPC9.copy()


EGF=pd.read_excel('Metacore_gene_sets_from_Tina.xlsx')
fig, axs = plt.subplots(4,4, figsize=(30, 20))
axs=axs.ravel()
k=0
for c in EGF['Geneset'].unique():
    dfaux=EGF.loc[EGF['Geneset']==c]
    genes=dfaux['symbol'].tolist()
    if len(genes)>0:
        expr=scrna.andata2df(adata)
        genes=[g for g in genes if g in expr.columns]
        score = expr.loc[:, genes].mean(axis=1)
        adata.obs[c+'_Sig'] = score

        scv.pl.scatter(adata, color=c+'_Sig', s=50,ax=axs[k],show=False)
        k=k+1
plt.show()
fig.savefig('figuresNEW/PC9/UMAPS_final/PC9Various_signaturesTina_UMAP.png',dpi=300, bbox_inches='tight')


fig, axs = plt.subplots(4,4, figsize=(30, 24))
axs=axs.ravel()
k=0
df=scrna.andata2df(adata)
df=df.loc[df[category]!='-']
for c in EGF['Geneset'].unique():
    dfaux=EGF.loc[EGF['Geneset']==c]
    genes=dfaux['symbol'].tolist()
    if len(genes)>0:
        g=c+'_Sig'
        #average expression
        df2=df[[g,'timepoint',category]]
        df2=df2.groupby(['timepoint',category]).mean().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2=df2.T
        df2=df2[['0hr','24hr','14day','16day']].dropna(how='all')

        #number of cells
        df2_2=df[[g,'timepoint',category]]
        df2_2=df2_2.groupby(['timepoint',category]).count().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2_2=df2_2.T
        df2_2=df2_2[['0hr','24hr','14day','16day']].dropna(how='all')
        dotmap(df2,df2_2,cmap='seismic',ax=axs[k],cbarlabel=g,marker='o')
        k=k+1
plt.tight_layout()

plt.show()
fig.savefig('figuresNEW/PC9/DOTPLOT_final/PC9Various_signaturesTina_DOTPLOT.png',dpi=300, bbox_inches='tight')

In [None]:
adata=adata827.copy()

#Look into YAP signature
YAP=pd.read_excel('YAP_73gene_signature_and_CHiP_targets_from_Laurent_Sansregret.xlsx',header=None)
genes=YAP[0].tolist()
expr=scrna.andata2df(adata)
genes=[g for g in genes if g in expr.columns]
score = expr.loc[:, genes].mean(axis=1)
adata.obs['YAP_Sig'] = score

#Mesenchymal signature https://www.nature.com/articles/s41598-018-21061-1
mes_sig=['VIM','CDH2','FOXC2','SNAI1','SNAI2','TWIST1','GSC','FN1','ITBG6','MMP2','MMP3','MMP9','SOX10']
mes_sig=[g for g in mes_sig if g in adata.var_names]
expr=scrna.andata2df(adata)
score = expr.loc[:, mes_sig].mean(axis=1)
adata.obs['Mesenchymal_Sig'] = score

df=scrna.andata2df(adata)
df=df.loc[df[category]!='-']


fig, axs = plt.subplots(2,4, figsize=(30, 12))
axs=axs.ravel()

k=0
for g in ['YAP_Sig','MET','CDH1','VIM']:
    scv.pl.scatter(adata, color=g, s=50,ax=axs[k],show=False)
    
    #average expression
    df2=df[[g,'timepoint',category]]
    df2=df2.groupby(['timepoint',category]).mean().reset_index().pivot(index='timepoint', columns=category, values=g)
    df2=df2.T
    df2=df2[['0hr','24hr','14day','16day']].dropna(how='all')
    
    #number of cells
    df2_2=df[[g,'timepoint',category]]
    df2_2=df2_2.groupby(['timepoint',category]).count().reset_index().pivot(index='timepoint', columns=category, values=g)
    df2_2=df2_2.T
    df2_2=df2_2[['0hr','24hr','14day','16day']].dropna(how='all')
    #df2=pd.melt(df2)
    #heatmap_with_size(df2['timepoint'], df2['value'], size=df2['value'])
   # sns.heatmap(df2,cmap='viridis',ax=axs[k+4])
    dotmap(df2,df2_2,cmap='seismic',ax=axs[k+4],cbarlabel='',marker='o')
    k=k+1
    
#set font size
for ax in axs:
    for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                  ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(20)
#plt.tight_layout()
plt.show()
fig.savefig('figuresNEW/HCC827/HCC827resistMechan.png',dpi=300, bbox_inches='tight')

In [None]:
adata=adata827.copy()

EGF=pd.read_excel('Gene_signatures_for_EGF816x2101_RNAseq_data.xlsx')
fig, axs = plt.subplots(3,4, figsize=(30, 15))
axs=axs.ravel()
k=0
for c in EGF.columns:
    genes=EGF[c].dropna().tolist()
    if len(genes)>0:
        expr=scrna.andata2df(adata)
        genes=[g for g in genes if g in expr.columns]
        score = expr.loc[:, genes].mean(axis=1)
        adata.obs[c+'_Sig'] = score

        scv.pl.scatter(adata, color=c+'_Sig', s=50,ax=axs[k],show=False)
        k=k+1
plt.show()
fig.savefig('figuresNEW/HCC827/UMAPS_final/HCC827Various_signaturesUMAP.png',dpi=300, bbox_inches='tight')


fig, axs = plt.subplots(3,4, figsize=(30, 15))
axs=axs.ravel()
k=0
df=scrna.andata2df(adata)
df=df.loc[df[category]!='-']
for c in EGF.columns:
    genes=EGF[c].dropna().tolist()
    if len(genes)>0:
        g=c+'_Sig'
        #average expression
        df2=df[[g,'timepoint',category]]
        df2=df2.groupby(['timepoint',category]).mean().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2=df2.T
        df2=df2[['0hr','24hr','14day','16day']].dropna(how='all')

        #number of cells
        df2_2=df[[g,'timepoint',category]]
        df2_2=df2_2.groupby(['timepoint',category]).count().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2_2=df2_2.T
        df2_2=df2_2[['0hr','24hr','14day','16day']].dropna(how='all')
        dotmap(df2,df2_2,cmap='seismic',ax=axs[k],cbarlabel=g,marker='o')
        k=k+1
plt.tight_layout()
plt.show()
fig.savefig('figuresNEW/HCC827/DOTPLOT_final/HCC827Various_signaturesDOTPLOT.png',dpi=300, bbox_inches='tight')

In [None]:
adata=adata827.copy()


EGF=pd.read_excel('Metacore_gene_sets_from_Tina.xlsx')
fig, axs = plt.subplots(4,4, figsize=(30, 20))
axs=axs.ravel()
k=0
for c in EGF['Geneset'].unique():
    dfaux=EGF.loc[EGF['Geneset']==c]
    genes=dfaux['symbol'].tolist()
    if len(genes)>0:
        expr=scrna.andata2df(adata)
        genes=[g for g in genes if g in expr.columns]
        score = expr.loc[:, genes].mean(axis=1)
        adata.obs[c+'_Sig'] = score

        scv.pl.scatter(adata, color=c+'_Sig', s=50,ax=axs[k],show=False)
        k=k+1
plt.show()
fig.savefig('figuresNEW/HCC827/UMAPS_final/HCC827Various_signaturesTina_UMAP.png',dpi=300, bbox_inches='tight')


fig, axs = plt.subplots(4,4, figsize=(30, 24))
axs=axs.ravel()
k=0
df=scrna.andata2df(adata)
df=df.loc[df[category]!='-']
for c in EGF['Geneset'].unique():
    dfaux=EGF.loc[EGF['Geneset']==c]
    genes=dfaux['symbol'].tolist()
    if len(genes)>0:
        g=c+'_Sig'
        #average expression
        df2=df[[g,'timepoint',category]]
        df2=df2.groupby(['timepoint',category]).mean().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2=df2.T
        df2=df2[['0hr','24hr','14day','16day']].dropna(how='all')

        #number of cells
        df2_2=df[[g,'timepoint',category]]
        df2_2=df2_2.groupby(['timepoint',category]).count().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2_2=df2_2.T
        df2_2=df2_2[['0hr','24hr','14day','16day']].dropna(how='all')
        dotmap(df2,df2_2,cmap='seismic',ax=axs[k],cbarlabel=g,marker='o')
        k=k+1
plt.tight_layout()

plt.show()
fig.savefig('figuresNEW/HCC827/DOTPLOT_final/HCC827Various_signaturesTina_DOTPLOT.png',dpi=300, bbox_inches='tight')

In [None]:
adata=adata827.copy()


EGF=pd.read_excel('Epithelial_Mesenchymal_and_YAP_signatures_for_Javi.xlsx')
fig, axs = plt.subplots(2,2, figsize=(15, 10))
axs=axs.ravel()
k=0
for c in EGF.columns:
    genes=EGF[c].dropna().tolist()
    if len(genes)>0:
        expr=scrna.andata2df(adata)
        genes=[g for g in genes if g in expr.columns]
        score = expr.loc[:, genes].mean(axis=1)
        adata.obs[c+'_Sig'] = score

        scv.pl.scatter(adata, color=c+'_Sig', s=50,ax=axs[k],show=False)
        k=k+1
plt.show()
fig.savefig('figuresNEW/HCC827/UMAPS_final/HCC827_EMTandYAP_UMAP.png',dpi=300, bbox_inches='tight')


fig, axs = plt.subplots(2,2, figsize=(15, 12))
axs=axs.ravel()
k=0
df=scrna.andata2df(adata)
df=df.loc[df[category]!='-']
for c in EGF.columns:
    genes=EGF[c].dropna().tolist()
    if len(genes)>0:
        g=c+'_Sig'
        #average expression
        df2=df[[g,'timepoint',category]]
        df2=df2.groupby(['timepoint',category]).mean().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2=df2.T
        df2=df2[['0hr','24hr','14day','16day']].dropna(how='all')

        #number of cells
        df2_2=df[[g,'timepoint',category]]
        df2_2=df2_2.groupby(['timepoint',category]).count().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2_2=df2_2.T
        df2_2=df2_2[['0hr','24hr','14day','16day']].dropna(how='all')
        dotmap(df2,df2_2,cmap='seismic',ax=axs[k],cbarlabel=g,marker='o')
        k=k+1
        plt.tight_layout()

plt.show()
fig.savefig('figuresNEW/HCC827/DOTPLOT_final/HCC827_EMTandYAP_DOTPLOT.png',dpi=300, bbox_inches='tight')

In [None]:
adata=adata4006.copy()

#Look into YAP signature
YAP=pd.read_excel('YAP_73gene_signature_and_CHiP_targets_from_Laurent_Sansregret.xlsx',header=None)
genes=YAP[0].tolist()
expr=scrna.andata2df(adata)
genes=[g for g in genes if g in expr.columns]
score = expr.loc[:, genes].mean(axis=1)
adata.obs['YAP_Sig'] = score

#Mesenchymal signature https://www.nature.com/articles/s41598-018-21061-1
mes_sig=['VIM','CDH2','FOXC2','SNAI1','SNAI2','TWIST1','GSC','FN1','ITBG6','MMP2','MMP3','MMP9','SOX10']
mes_sig=[g for g in mes_sig if g in adata.var_names]
expr=scrna.andata2df(adata)
genes=[g for g in genes if g in expr.columns]
score = expr.loc[:, mes_sig].mean(axis=1)
adata.obs['Mesenchymal_Sig'] = score

df=scrna.andata2df(adata)
df=df.loc[df[category]!='-']


fig, axs = plt.subplots(2,4, figsize=(30, 12))
axs=axs.ravel()

k=0
for g in ['YAP_Sig','MET','CDH1','VIM']:
    scv.pl.scatter(adata, color=g, s=50,ax=axs[k],show=False)
    
    #average expression
    df2=df[[g,'timepoint',category]]
    df2=df2.groupby(['timepoint',category]).mean().reset_index().pivot(index='timepoint', columns=category, values=g)
    df2=df2.T
    df2=df2[['0hr','24hr','14day','16day']].dropna(how='all')
    
    #number of cells
    df2_2=df[[g,'timepoint',category]]
    df2_2=df2_2.groupby(['timepoint',category]).count().reset_index().pivot(index='timepoint', columns=category, values=g)
    df2_2=df2_2.T
    df2_2=df2_2[['0hr','24hr','14day','16day']].dropna(how='all')
    #df2=pd.melt(df2)
    #heatmap_with_size(df2['timepoint'], df2['value'], size=df2['value'])
   # sns.heatmap(df2,cmap='viridis',ax=axs[k+4])
    dotmap(df2,df2_2,cmap='seismic',ax=axs[k+4],cbarlabel='',marker='o')
    k=k+1
    
#set font size
for ax in axs:
    for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                  ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(20)
#plt.tight_layout()
plt.show()
fig.savefig('figuresNEW/HCC4006/HCC4006resistMechan.png',dpi=300, bbox_inches='tight')

In [None]:
adata=adata4006.copy()

EGF=pd.read_excel('Gene_signatures_for_EGF816x2101_RNAseq_data.xlsx')
fig, axs = plt.subplots(3,4, figsize=(30, 15))
axs=axs.ravel()
k=0
for c in EGF.columns:
    genes=EGF[c].dropna().tolist()
    if len(genes)>0:
        expr=scrna.andata2df(adata)
        genes=[g for g in genes if g in expr.columns]
        score = expr.loc[:, genes].mean(axis=1)
        adata.obs[c+'_Sig'] = score

        scv.pl.scatter(adata, color=c+'_Sig', s=50,ax=axs[k],show=False)
        k=k+1
plt.show()
fig.savefig('figuresNEW/HCC4006/UMAPS_final/HCC4006Various_signaturesUMAP.png',dpi=300, bbox_inches='tight')


fig, axs = plt.subplots(3,4, figsize=(30, 15))
axs=axs.ravel()
k=0
df=scrna.andata2df(adata)
df=df.loc[df[category]!='-']
for c in EGF.columns:
    genes=EGF[c].dropna().tolist()
    if len(genes)>0:
        g=c+'_Sig'
        #average expression
        df2=df[[g,'timepoint',category]]
        df2=df2.groupby(['timepoint',category]).mean().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2=df2.T
        df2=df2[['0hr','24hr','14day','16day']].dropna(how='all')

        #number of cells
        df2_2=df[[g,'timepoint',category]]
        df2_2=df2_2.groupby(['timepoint',category]).count().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2_2=df2_2.T
        df2_2=df2_2[['0hr','24hr','14day','16day']].dropna(how='all')
        dotmap(df2,df2_2,cmap='seismic',ax=axs[k],cbarlabel=g,marker='o')
        k=k+1
plt.tight_layout()
plt.show()
fig.savefig('figuresNEW/HCC4006/DOTPLOT_final/HCC4006Various_signaturesDOTPLOT.png',dpi=300, bbox_inches='tight')

In [None]:
adata=adata4006.copy()

EGF=pd.read_excel('Metacore_gene_sets_from_Tina.xlsx')
fig, axs = plt.subplots(4,4, figsize=(30, 20))
axs=axs.ravel()
k=0
for c in EGF['Geneset'].unique():
    dfaux=EGF.loc[EGF['Geneset']==c]
    genes=dfaux['symbol'].tolist()
    if len(genes)>0:
        expr=scrna.andata2df(adata)
        genes=[g for g in genes if g in expr.columns]
        score = expr.loc[:, genes].mean(axis=1)
        adata.obs[c+'_Sig'] = score

        scv.pl.scatter(adata, color=c+'_Sig', s=50,ax=axs[k],show=False)
        k=k+1
plt.show()
fig.savefig('figuresNEW/HCC4006/UMAPS_final/HCC4006Various_signaturesTina_UMAP.png',dpi=300, bbox_inches='tight')


fig, axs = plt.subplots(4,4, figsize=(30, 24))
axs=axs.ravel()
k=0
df=scrna.andata2df(adata)
df=df.loc[df[category]!='-']
for c in EGF['Geneset'].unique():
    dfaux=EGF.loc[EGF['Geneset']==c]
    genes=dfaux['symbol'].tolist()
    if len(genes)>0:
        g=c+'_Sig'
        #average expression
        df2=df[[g,'timepoint',category]]
        df2=df2.groupby(['timepoint',category]).mean().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2=df2.T
        df2=df2[['0hr','24hr','14day','16day']].dropna(how='all')

        #number of cells
        df2_2=df[[g,'timepoint',category]]
        df2_2=df2_2.groupby(['timepoint',category]).count().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2_2=df2_2.T
        df2_2=df2_2[['0hr','24hr','14day','16day']].dropna(how='all')
        dotmap(df2,df2_2,cmap='seismic',ax=axs[k],cbarlabel=g,marker='o')
        k=k+1
plt.tight_layout()

plt.show()
fig.savefig('figuresNEW/HCC4006/DOTPLOT_final/HCC4006Various_signaturesTina_DOTPLOT.png',dpi=300, bbox_inches='tight')

In [None]:
import plotly.express as px

df = scrna.andata2df(adata)#px.data.election()

In [None]:
#dfaux=df.copy()#df.loc[df['trajectory_class']!='-']
#dfaux=dfaux.loc[dfaux.timepoint=='14day']
#fig = px.scatter_ternary(dfaux, a="VIM", b="CDH1", c="MET", size_max=15,color='timepoint',
#    color_discrete_map = {"VIM": "blue", "Yap_Sig": "green", "MET":"red"} )#hover_name="district",#color="winner", size="total",
#fig.show()

# DE analysis

In [None]:
#DE genes between trajectory classes at each timepoint
adata=adata827.copy()
cl='HCC827'

k=0
for t in adata.obs['timepoint'].unique():
    adataaux=adata[adata.obs['timepoint']==t]
    adataaux=adataaux[adataaux.obs['trajectory_class']!='-']#look only at actual classes
    
    sc.tl.rank_genes_groups(adataaux, 'trajectory_class',use_raw=True)
    ranked_genes = pd.DataFrame(adataaux.uns['rank_genes_groups']['names'])
    ranked_genes.to_csv('figures/datasets/'+cl+'_DEgenes_trajClass'+t+'.csv')
    sc.pl.rank_genes_groups_dotplot(adataaux, n_genes=5,save='/traj_class_comparison/'+cl+'_DEgenes_trajClass'+t+'.pdf',show=False)#, vmin=-3,vmax=3,use_raw=False)#, cmap='bwr')
    k=k+1
    
    
    
adata=adata4006.copy()
cl='HCC4006'

k=0
for t in adata.obs['timepoint'].unique():
    adataaux=adata[adata.obs['timepoint']==t]
    adataaux=adataaux[adataaux.obs['trajectory_class']!='-']#look only at actual classes
    
    sc.tl.rank_genes_groups(adataaux, 'trajectory_class',use_raw=True)
    ranked_genes = pd.DataFrame(adataaux.uns['rank_genes_groups']['names'])
    ranked_genes.to_csv('figures/datasets/'+cl+'_DEgenes_trajClass'+t+'.csv')
    sc.pl.rank_genes_groups_dotplot(adataaux, n_genes=5,save='/traj_class_comparison/'+cl+'_DEgenes_trajClass'+t+'.pdf',show=False)#, vmin=-3,vmax=3,use_raw=False)#, cmap='bwr')
    k=k+1
    
    
adata=adata707.copy()
cl='MGH707'

k=0
for t in adata.obs['timepoint'].unique():
    adataaux=adata[adata.obs['timepoint']==t]
    adataaux=adataaux[adataaux.obs['trajectory_class']!='-']#look only at actual classes
    
    sc.tl.rank_genes_groups(adataaux, 'trajectory_class',use_raw=True)
    ranked_genes = pd.DataFrame(adataaux.uns['rank_genes_groups']['names'])
    ranked_genes.to_csv('figures/datasets/'+cl+'_DEgenes_trajClass'+t+'.csv')
    sc.pl.rank_genes_groups_dotplot(adataaux, n_genes=5,save='/traj_class_comparison/'+cl+'_DEgenes_trajClass'+t+'.pdf',show=False)#, vmin=-3,vmax=3,use_raw=False)#, cmap='bwr')
    k=k+1
    
    
adata=adataPC9.copy()
cl='PC9'

k=0
for t in adata.obs['timepoint'].unique():
    adataaux=adata[adata.obs['timepoint']==t]
    adataaux=adataaux[adataaux.obs['trajectory_class']!='-']#look only at actual classes
    
    sc.tl.rank_genes_groups(adataaux, 'trajectory_class',use_raw=True)
    ranked_genes = pd.DataFrame(adataaux.uns['rank_genes_groups']['names'])
    ranked_genes.to_csv('figures/datasets/'+cl+'_DEgenes_trajClass'+t+'.csv')
    sc.pl.rank_genes_groups_dotplot(adataaux, n_genes=5,save='/traj_class_comparison/'+cl+'_DEgenes_trajClass'+t+'.pdf',show=False)#, vmin=-3,vmax=3,use_raw=False)#, cmap='bwr')
    k=k+1

In [None]:
#DE genes between sensitive and tolerant
adata=adata827.copy()
cl='HCC827'
  
k=0
for t in adata.obs['timepoint'].unique():
    adataaux=adata[adata.obs['timepoint']==t]
    adataaux=adataaux[adataaux.obs['trajectory_class']!='-']#look only at actual classes
    adataaux=adataaux[np.isin(adataaux.obs['sensitivity'],['sensitive','tolerant'])]
    
    sc.tl.rank_genes_groups(adataaux, 'sensitivity',use_raw=True)#,reference='sensitive')
    ranked_genes = pd.DataFrame(adataaux.uns['rank_genes_groups']['names'])
    ranked_genes.to_csv('figures/datasets/'+cl+'_DEgenes_sensitivity'+t+'.csv')
    sc.pl.rank_genes_groups_dotplot(adataaux, n_genes=10,figsize=(10,3),dendrogram=None,save='/sensitive_tolerant_comparison/'+cl+'_DEgenes_sensitivity'+t+'.pdf',show=False)#, vmin=-3,vmax=3,use_raw=False)#, cmap='bwr')

    
    
adata=adata4006.copy()
cl='HCC4006'
  
k=0
for t in adata.obs['timepoint'].unique():
    adataaux=adata[adata.obs['timepoint']==t]
    adataaux=adataaux[adataaux.obs['trajectory_class']!='-']#look only at actual classes
    adataaux=adataaux[np.isin(adataaux.obs['sensitivity'],['sensitive','tolerant'])]
    
    sc.tl.rank_genes_groups(adataaux, 'sensitivity',use_raw=True)#,reference='sensitive')
    ranked_genes = pd.DataFrame(adataaux.uns['rank_genes_groups']['names'])
    ranked_genes.to_csv('figures/datasets/'+cl+'_DEgenes_sensitivity'+t+'.csv')
    sc.pl.rank_genes_groups_dotplot(adataaux, n_genes=10,figsize=(10,3),dendrogram=None,save='/sensitive_tolerant_comparison/'+cl+'_DEgenes_sensitivity'+t+'.pdf',show=False)#, vmin=-3,vmax=3,use_raw=False)#, cmap='bwr')

 
adata=adata707.copy()
cl='MGH707'
  
k=0
for t in adata.obs['timepoint'].unique():
    if t!='16day':
        adataaux=adata[adata.obs['timepoint']==t]
        adataaux=adataaux[adataaux.obs['trajectory_class']!='-']#look only at actual classes
        adataaux=adataaux[np.isin(adataaux.obs['sensitivity'],['sensitive','tolerant'])]

        sc.tl.rank_genes_groups(adataaux, 'sensitivity',use_raw=True)#,reference='sensitive')
        ranked_genes = pd.DataFrame(adataaux.uns['rank_genes_groups']['names'])
        ranked_genes.to_csv('figures/datasets/'+cl+'_DEgenes_sensitivity'+t+'.csv')
        sc.pl.rank_genes_groups_dotplot(adataaux, n_genes=10,figsize=(10,3),dendrogram=None,save='/sensitive_tolerant_comparison/'+cl+'_DEgenes_sensitivity'+t+'.pdf',show=False)#, vmin=-3,vmax=3,use_raw=False)#, cmap='bwr')
    else:
        adataaux=adata[adata.obs['timepoint']==t]
        adataaux=adataaux[adataaux.obs['trajectory_class']!='-']#look only at actual classes
        adataaux=adataaux[np.isin(adataaux.obs['sensitivity'],['sensitive','tolerant'])]
        sc.pl.umap(adataaux, color="sensitivity")
        
adata=adataPC9.copy()
cl='PC9'
  
k=0
for t in adata.obs['timepoint'].unique():
    adataaux=adata[adata.obs['timepoint']==t]
    adataaux=adataaux[adataaux.obs['trajectory_class']!='-']#look only at actual classes
    adataaux=adataaux[np.isin(adataaux.obs['sensitivity'],['sensitive','tolerant'])]
    
    sc.tl.rank_genes_groups(adataaux, 'sensitivity',use_raw=True)#,reference='sensitive')
    ranked_genes = pd.DataFrame(adataaux.uns['rank_genes_groups']['names'])
    ranked_genes.to_csv('figures/datasets/'+cl+'_DEgenes_sensitivity'+t+'.csv')
    sc.pl.rank_genes_groups_dotplot(adataaux, n_genes=10,figsize=(10,3),dendrogram=None,save='/sensitive_tolerant_comparison/'+cl+'_DEgenes_sensitivity'+t+'.pdf',show=False)#, vmin=-3,vmax=3,use_raw=False)#, cmap='bwr')


In [None]:
t

## Heatmaps 

In [None]:
#heatmaps
def custom_heatmap(df,genelist,sortby='log2fc',figsize=(10,5),
               z_score=None,col_cluster=False,row_cluster=True,colormap='seismic',centercolor=False,title='',vmin=[],vmax=[]):

    vminin=vmin
    vmaxin=vmax
    
    from matplotlib import gridspec
    from matplotlib import cm
    from matplotlib.colors import ListedColormap, LinearSegmentedColormap

    plotdf=df2.sort_values(sortby)
    plotdf=plotdf.dropna()

    #clustermap and sort
    
    dfa=plotdf[[i for i in plotdf.columns if i not in ['log2fc']]]
    dfb=plotdf[['log2fc']]
    
    
    if row_cluster:
        gden=sns.clustermap(dfa,z_score=z_score,col_cluster=col_cluster,row_cluster=row_cluster,figsize=(1,1),method='single', metric='euclidean')
        dfa=gden.data2d#dfa.loc[gden.data2d.index,gden.data2d.columns]
        dfb=dfb.loc[dfa.index]

    fig = plt.figure(figsize=figsize) 
    gs = gridspec.GridSpec(1, 2, width_ratios=[1,20],wspace=0.005)

    k=0
    #=========================

    cmap='viridis'#'PRGn'
    vmin=dfb[[sortby]].min()
    vmax=dfb[[sortby]].max()

    axs = plt.subplot(gs[k])
    sns.heatmap(dfb[[sortby]],ax=axs,cmap=cmap,linewidths=1,linecolor='k',vmin=vmin,vmax=vmax,annot=True,cbar=False)#,yticklabels=False)
    axs.set_xticklabels(axs.get_xticklabels(), rotation=90)

    k=1
    #=========================
    axs = plt.subplot(gs[k])
    if centercolor==True:
        #maxim=gden.data2d.max().max()
        #minim=gden.data2d.min().min()
        #if vminin==[] and vmaxin==[]:
        #    vmin=-max(np.abs(maxim),np.abs(minim))
        #    vmax=-vmin
        sns.heatmap(dfa,ax=axs,cmap=colormap,vmin=vminin,vmax=vmaxin,yticklabels=False,linewidths=0.5,linecolor='k')
    else:
        sns.heatmap(dfa,ax=axs,cmap=colormap,yticklabels=False,linewidths=1,linecolor='k',annot=False)
    axs.set_ylabel('')  
    cax = plt.gcf().axes[-1]
    cax.tick_params(labelsize=12)
    #cbar = axs.collections[0].colorbar
    #cbar.set_ticks(np.arange(len(analytes)) + 0.5)
    #cbar.set_ticklabels(analytes)
    cbar = plt.gcf().axes[-1]

    cbar.text(0.5,.5,'',rotation=90,fontsize=12,verticalalignment='center',horizontalalignment='center',transform=cax.transAxes,color='k')#,b
    
    axs.set_title(title,fontsize=14)

    return fig#,gden

In [None]:
names=['HCC4006','HCC827','PC9','MGH707']
k=0
for adata in [adata4006,adata827,adataPC9,adata707]:
    genestoplot=[]
    EGF=pd.read_excel('Gene sets for heatmaps and UMAPs.xlsx',header=0)
    for c in EGF.columns:
        genes=EGF[c].dropna().tolist()
        if len(genes)>0:
            expr=scrna.andata2df(adata)
            genes=[g for g in genes if g in expr.columns]
            score = expr.loc[:, genes].mean(axis=1)
            adata.obs[c+'_Sig'] = score
            genestoplot.append(c+'_Sig')

    df=scrna.andata2df(adata)
    df=df[genestoplot+['trajectory_class','timepoint','log2fc']]
    FCs=df[['log2fc','trajectory_class']].groupby('trajectory_class').mean()

    Ts=['0hr','24hr']
    DFs={t:[] for t in Ts}
    for t in Ts:
        DFs[t]=df[genestoplot+['trajectory_class','timepoint']].loc[df['timepoint']==t].groupby('trajectory_class').mean()
        DFs[t]=DFs[t].merge(FCs,left_index=True,right_index=True)

    df2=DFs['0hr'].merge(DFs['24hr'],left_index=True,right_index=True,suffixes=['_0hr','_24hr'])

    df2=df2[df2.columns.sort_values()]
    df2['log2fc']=df2['log2fc_0hr']
    df2=df2.drop(columns=['log2fc_0hr','log2fc_24hr'])
    
    df2=df2.loc[df2.index!='-']
    
    if names[k]=='HCC4006':
        vmin=-0.6
        vmax=0.6
    elif names[k]=='HCC827':
        vmin=-0.25
        vmax=0.25
    elif names[k]=='PC9':
        vmin=-0.45
        vmax=0.45
    elif names[k]=='MGH707':
        vmin=-0.35
        vmax=0.35

    fig=custom_heatmap(df2,genestoplot,sortby='log2fc',figsize=(20,5),
                            z_score=None,col_cluster=False,row_cluster=False,
                            colormap='seismic',centercolor=True,title=names[k],vmin=vmin,vmax=vmax)
    
    fig.savefig('figuresNEW/'+names[k]+'/'+names[k]+'_0-24Heatmap.pdf',bbox_inches='tight')
    k=k+1

In [None]:
names=['HCC4006','HCC827','PC9','MGH707']
k=0
for adata in [adata4006,adata827,adataPC9,adata707]:
    genestoplot=[]
    EGF=pd.read_excel('Gene sets for heatmaps and UMAPs.xlsx',header=0)
    for c in EGF.columns:
        genes=EGF[c].dropna().tolist()
        if len(genes)>0:
            expr=scrna.andata2df(adata)
            genes=[g for g in genes if g in expr.columns]
            score = expr.loc[:, genes].mean(axis=1)
            adata.obs[c+'_Sig'] = score
            genestoplot.append(c+'_Sig')

    df=scrna.andata2df(adata)
    df=df[genestoplot+['trajectory_class','timepoint','log2fc']]
    FCs=df[['log2fc','trajectory_class']].groupby('trajectory_class').mean()

    Ts=['14day','16day']
    DFs={t:[] for t in Ts}
    for t in Ts:
        DFs[t]=df[genestoplot+['trajectory_class','timepoint']].loc[df['timepoint']==t].groupby('trajectory_class').mean()
        DFs[t]=DFs[t].merge(FCs,left_index=True,right_index=True)

    df2=DFs['14day'].merge(DFs['16day'],left_index=True,right_index=True,suffixes=['_14day','_16day'])

    df2=df2[df2.columns.sort_values()]
    df2['log2fc']=df2['log2fc_14day']
    df2=df2.drop(columns=['log2fc_14day','log2fc_16day'])
    
    df2=df2.loc[df2.index!='-']

    if names[k]=='HCC4006':
        vmin=-0.5
        vmax=0.5
    elif names[k]=='HCC827':
        vmin=-0.25
        vmax=0.25
    elif names[k]=='PC9':
        vmin=-0.25
        vmax=0.25
    elif names[k]=='MGH707':
        vmin=-0.25
        vmax=0.25

    fig=custom_heatmap(df2,genestoplot,sortby='log2fc',figsize=(20,5),
                            z_score=None,col_cluster=False,row_cluster=False,
                            colormap='seismic',centercolor=True,title=names[k],vmin=vmin,vmax=vmax)
    
    fig.savefig('figuresNEW/'+names[k]+'/'+names[k]+'_14-16Heatmap.pdf',bbox_inches='tight')
    k=k+1

In [None]:
names=['HCC4006','HCC827','PC9','MGH707']
k=0
for adata in [adata4006,adata827,adataPC9,adata707]:
    genestoplot=[]
    EGF=pd.read_excel('Gene sets for heatmaps and UMAPs.xlsx',header=0)
    for c in EGF.columns:
        genes=EGF[c].dropna().tolist()
        if len(genes)>0:
            expr=scrna.andata2df(adata)
            genes=[g for g in genes if g in expr.columns]
            score = expr.loc[:, genes].mean(axis=1)
            adata.obs[c+'_Sig'] = score
            genestoplot.append(c+'_Sig')

    df=scrna.andata2df(adata)
    df=df[genestoplot+['sensitivity','timepoint']]

    Ts=['0hr','24hr']
    DFs={t:[] for t in Ts}
    for t in Ts:
        DFs[t]=df[genestoplot+['sensitivity','timepoint']].loc[df['timepoint']==t].groupby('sensitivity').mean()

    df2=DFs['0hr'].merge(DFs['24hr'],left_index=True,right_index=True,suffixes=['_0hr','_24hr'])

    df2=df2[df2.columns.sort_values()]
    
    df2=df2.loc[df2.index!='-']

    #fig,gden=custom_heatmap(df2,genestoplot,sortby='log2fc',figsize=(10,2),
    #               z_score=1,col_cluster=False,row_cluster=True,colormap='seismic',centercolor=False,title=names[k])
    if names[k]=='HCC4006':
        vmin=-0.3
        vmax=0.3
    elif names[k]=='HCC827':
        vmin=-0.3
        vmax=0.3
    elif names[k]=='PC9':
        vmin=-0.3
        vmax=0.3
    elif names[k]=='MGH707':
        vmin=-0.3
        vmax=0.3

    
    fig = plt.figure(figsize=(20,2)) 
    sns.heatmap(df2,cmap='seismic',linewidth=1,linecolor='k',vmin=vmin,vmax=vmax)
    plt.title(names[k])
    
    fig.savefig('figuresNEW/'+names[k]+'/'+names[k]+'_0-24SensitivityHeatmap.pdf',bbox_inches='tight')
    k=k+1

In [None]:
names=['HCC4006','HCC827','PC9','MGH707']
kk=0
for adata in [adata4006,adata827,adataPC9,adata707]:

    genestoplot=[]
    EGF=pd.read_excel('Gene sets for heatmaps and UMAPs.xlsx',header=0)
    for c in EGF.columns:
        genes=EGF[c].dropna().tolist()
        if len(genes)>0:
            expr=scrna.andata2df(adata)
            genes=[g for g in genes if g in expr.columns]
            score = expr.loc[:, genes].mean(axis=1)
            adata.obs[c+'_Sig'] = score
            genestoplot.append(c+'_Sig')

    VARS=['VIM','CDH1','MET']+genestoplot

    #plot
    from matplotlib.pyplot import cm

    
    fig, axs = plt.subplots(4, 6, figsize=(45, 20))
    axs=axs.ravel()
    kkk=0
    for VAR in VARS:
        scv.pl.scatter(adata, color=VAR, s=50,ax=axs[kkk],show=False)
        kkk=kkk+1

    #set font size
    for ax in axs:
        for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                      ax.get_xticklabels() + ax.get_yticklabels()):
            item.set_fontsize(20)
    #plt.tight_layout()
    fig.savefig('figuresNEW/'+names[kk]+'/UMAPS_final/all_timepoints/'+names[kk]+'_UMAP'+'.png',dpi=300, bbox_inches='tight')
    
    
    
    
    fig, axs = plt.subplots(4,6, figsize=(45,20))
    axs=axs.ravel()
    k=0
    df=scrna.andata2df(adata)
    df=df.loc[df[category]!='-']
    for VAR in VARS:
        g=VAR
        #average expression
        df2=df[[g,'timepoint',category]]
        df2=df2.groupby(['timepoint',category]).mean().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2=df2.T
        df2=df2[['0hr','24hr','14day','16day']].dropna(how='all')

        #number of cells
        df2_2=df[[g,'timepoint',category]]
        df2_2=df2_2.groupby(['timepoint',category]).count().reset_index().pivot(index='timepoint', columns=category, values=g)
        df2_2=df2_2.T
        df2_2=df2_2[['0hr','24hr','14day','16day']].dropna(how='all')
        dotmap(df2,df2_2,cmap='seismic',ax=axs[k],cbarlabel=g,marker='o',figsize=(7,15))
        axs[k].set_title(VAR)
        k=k+1
    for ax in axs:
        for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                      ax.get_xticklabels() + ax.get_yticklabels()):
            item.set_fontsize(20)
    plt.tight_layout()

    plt.show()
    fig.savefig('figuresNEW/'+names[kk]+'/DOTPLOT_final/'+names[kk]+'.png',dpi=300, bbox_inches='tight')
    kk=kk+1

In [None]:
names=['HCC4006','HCC827','PC9','MGH707']
kk=0
for adata in [adata4006,adata827,adataPC9,adata707]:
    
    mask=np.isin(adata.obs['timepoint'],['0hr','24hr'])
    adataaux=adata[mask]
    

    genestoplot=[]
    EGF=pd.read_excel('Gene sets for heatmaps and UMAPs.xlsx',header=0)
    for c in EGF.columns:
        genes=EGF[c].dropna().tolist()
        if len(genes)>0:
            expr=scrna.andata2df(adata)
            genes=[g for g in genes if g in expr.columns]
            score = expr.loc[:, genes].mean(axis=1)
            adata.obs[c+'_Sig'] = score
            genestoplot.append(c+'_Sig')

    VARS=['VIM','CDH1','MET']+genestoplot

    #plot
    from matplotlib.pyplot import cm

    
    fig, axs = plt.subplots(4, 6, figsize=(45, 20))
    axs=axs.ravel()
    kkk=0
    for VAR in VARS:
        scv.pl.scatter(adata, color='white', size=100,ax=axs[kkk],add_outline=True,alpha=0,show=False)
        scv.pl.scatter(adataaux, color=VAR, s=50,show=False,ax=axs[kkk])
        kkk=kkk+1

    #set font size
    for ax in axs:
        for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                      ax.get_xticklabels() + ax.get_yticklabels()):
            item.set_fontsize(20)
    #plt.tight_layout()
    #plt.show()
    fig.savefig('figuresNEW/'+names[kk]+'/UMAPS_final/0-24/'+names[kk]+'_UMAP'+VAR.replace(' ','_').replace('/','-')+'.png',dpi=300, bbox_inches='tight')
    kk=kk+1

In [None]:
names=['HCC4006','HCC827','PC9','MGH707']
kk=0
for adata in [adata4006,adata827,adataPC9,adata707]:
    
    mask=np.isin(adata.obs['timepoint'],['14day','16day'])
    adataaux=adata[mask]
    

    genestoplot=[]
    EGF=pd.read_excel('Gene sets for heatmaps and UMAPs.xlsx',header=0)
    for c in EGF.columns:
        genes=EGF[c].dropna().tolist()
        if len(genes)>0:
            expr=scrna.andata2df(adata)
            genes=[g for g in genes if g in expr.columns]
            score = expr.loc[:, genes].mean(axis=1)
            adata.obs[c+'_Sig'] = score
            genestoplot.append(c+'_Sig')

    VARS=['VIM','CDH1','MET']+genestoplot

    #plot
    from matplotlib.pyplot import cm

    
    fig, axs = plt.subplots(4, 6, figsize=(45, 20))
    axs=axs.ravel()
    kkk=0
    for VAR in VARS:
        scv.pl.scatter(adata, color='white', size=100,ax=axs[kkk],add_outline=True,alpha=0,show=False)
        scv.pl.scatter(adataaux, color=VAR, s=50,show=False,ax=axs[kkk])
        kkk=kkk+1

    #set font size
    for ax in axs:
        for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                      ax.get_xticklabels() + ax.get_yticklabels()):
            item.set_fontsize(20)
    #plt.tight_layout()
    #plt.show()
    fig.savefig('figuresNEW/'+names[kk]+'/UMAPS_final/14-16/'+names[kk]+'_UMAP'+VAR.replace(' ','_').replace('/','-')+'.png',dpi=300, bbox_inches='tight')
    kk=kk+1

In [None]:
adata=adata4006.copy()

EGF=pd.read_excel('Epithelial_Mesenchymal_and_YAP_signatures_for_Javi.xlsx')
for c in EGF.columns:#['Epithelial', 'Mesenchymal']:
    genes=EGF[c].dropna().tolist()
    if len(genes)>0:
        expr=scrna.andata2df(adata)
        genes=[g for g in genes if g in expr.columns]
        score = expr.loc[:, genes].mean(axis=1)
        print(c)
        adata.obs[c+'_Sig'] = score

sc.pl.violin(adata, ['Epithelial_Sig','Mesenchymal_Sig','MET'], groupby='timepoint', log=False, use_raw=None, stripplot=True, jitter=True, size=1, scale='width', 
             order=['0hr','24hr','14day','16day'], multi_panel=None, xlabel='', rotation=None, show=None, save=None, ax=None)

In [None]:
names=['HCC4006','HCC827','PC9','MGH707']
kk=0
for adata in [adata4006,adata827,adataPC9,adata707]:

    genestoplot=[]
    EGF=pd.read_excel('Gene sets for heatmaps and UMAPs.xlsx',header=0)
    for c in EGF.columns:
        genes=EGF[c].dropna().tolist()
        if len(genes)>0:
            expr=scrna.andata2df(adata)
            genes=[g for g in genes if g in expr.columns]
            score = expr.loc[:, genes].mean(axis=1)
            adata.obs[c+'_Sig'] = score
            genestoplot.append(c+'_Sig')

    VARS=['VIM','CDH1','MET']+genestoplot

    dfraw=scrna.andata2df(adata)
    df=dfraw.groupby(['trajectory_class','timepoint']).mean().reset_index()
    df=df.loc[df['trajectory_class']!='-']
    mask=['_' not in i and '-' not in i for i in df['trajectory_class'] if i]
    df=df.loc[mask]

    dfstd=dfraw.groupby(['trajectory_class','timepoint']).sem().reset_index()
    mask=['_' not in i and '-' not in i for i in dfstd['trajectory_class'] if i]
    dfstd=dfstd.loc[mask]

    #plot
    from matplotlib.pyplot import cm


    for VAR in VARS:
        fig, axs = plt.subplots(1, 1, figsize=(11, 7))
        #conds=['0hr','24hr','14day','14day MET amp','14day EMT','16day','16day MET amp','16day EMT']#df.timepoint.unique()
        conds=['0hr','24hr','14day','16day']
        exp_bcs=[i for i in df[groupby].unique()]
        color=cm.Set2(np.linspace(0,1,len(exp_bcs)))#cm.tab10(np.linspace(0,1,len(exp_bcs)))
        #np.random.shuffle(color)

        r = list(range(len(conds)))

        vals=np.zeros(len(r)).tolist()
        k=0
        for bc in df['trajectory_class'].unique():
            bars=[]
            barsstd=[]
            dfaux=df.loc[df['trajectory_class']==bc]
            dfauxstd=dfstd.loc[dfstd['trajectory_class']==bc]
            for t in conds:
                if t in dfaux['timepoint'].unique():
                    dfaux2=dfaux.loc[dfaux['timepoint']==t]
                    dfaux2std=dfauxstd.loc[dfauxstd['timepoint']==t]
                    value=dfaux2[VAR].values[0]
                    valuestd=dfaux2std[VAR].values[0]
                else:
                    value=0
                    valuestd=0
                bars.append(value)
                barsstd.append(valuestd)

            #axs.plot(r,bars,color=color[k],linewidth=5,label=bc)
            axs.errorbar(r,bars,barsstd,color=color[k],linewidth=5,label=bc,elinewidth=3,capsize=10,capthick=3)
            k=k+1
        axs.legend(fontsize=20,loc='center left', bbox_to_anchor=(1, 0.5))

        axs.set_ylabel(VAR,fontsize=20)
        axs.set_xticks(list(range(len(r)))) 
        axs.set_xticklabels(conds,fontsize=20)
        #axs.set_yticklabels([0,0.2,0.4,0.6,0.8,1],fontsize=20)
        axs.set_xlim([0,3])
        #axs.set_ylim([0,1])

        for item in ([axs.title, axs.xaxis.label, axs.yaxis.label] +
                      axs.get_xticklabels() + axs.get_yticklabels()):
            item.set_fontsize(20)

        #plt.show()
        fig.savefig('figuresNEW/timeseries/'+names[kk]+'_'+VAR.replace(' ','_').replace('/','-')+'.pdf',bbox_inches='tight')
    kk=kk+1

In [None]:
from statannot import add_stat_annotation

names=['HCC4006','HCC827','PC9','MGH707']
tp='0hr'

k=0
for adata in [adata4006,adata827,adataPC9]:#,adata707]:
    adata=adata[adata.obs['sensitivity']!='-']
    genestoplot=[]
    EGF=pd.read_excel('Gene sets for heatmaps and UMAPs.xlsx',header=0)
    for c in EGF.columns:
        genes=EGF[c].dropna().tolist()
        if len(genes)>0:
            expr=scrna.andata2df(adata)
            genes=[g for g in genes if g in expr.columns]
            score = expr.loc[:, genes].mean(axis=1)
            adata.obs[c+'_Sig'] = score
            genestoplot.append(c+'_Sig')
    df=scrna.andata2df(adata)
    dfaux=df.loc[df['timepoint']==tp]
    print(names[k]+' '+tp)
    fig, axs = plt.subplots(4,6, figsize=(20,15))
    axs=axs.ravel()
    kk=0
    for g in genestoplot+['VIM','CDH1','MET']:
        sns.boxplot(data=dfaux,x='sensitivity',y=g,showfliers=False,ax=axs[kk],order=['sensitive','tolerant'],palette="Set3")
        
        test_results = add_stat_annotation(axs[kk], data=dfaux, x='sensitivity', y=g, order=['sensitive','tolerant'],
                                   box_pairs=[('sensitive','tolerant')],test='t-test_ind', show_test_name=False,text_format='simple', loc='inside', verbose=0)
        
        kk=kk+1
        
        
    fig.tight_layout()
    plt.show()
    fig.savefig('figuresNEW/boxplots/sensitivityVSTrajClass'+names[k]+'_'+tp+'.pdf',bbox_inches='tight')
    k=k+1




In [None]:
names=['HCC4006','HCC827','PC9','MGH707']
tp='0hr'

k=0
for adata in [adata4006,adata827,adataPC9,adata707]:
    adata=adata[adata.obs['trajectory_class']!='-']
    genestoplot=[]
    EGF=pd.read_excel('Gene sets for heatmaps.xlsx',header=0)
    for c in EGF.columns:
        genes=EGF[c].dropna().tolist()
        if len(genes)>0:
            expr=scrna.andata2df(adata)
            genes=[g for g in genes if g in expr.columns]
            score = expr.loc[:, genes].mean(axis=1)
            adata.obs[c+'_Sig'] = score
            genestoplot.append(c+'_Sig')
    df=scrna.andata2df(adata)
    dfaux=df.loc[df['timepoint']==tp]
    print(names[k]+' '+tp)
    fig, axs = plt.subplots(4,6, figsize=(45,20))
    axs=axs.ravel()
    kk=0
    for g in genestoplot+['VIM','CDH1','MET']:
        sns.boxplot(data=dfaux,x='trajectory_class',y=g,showfliers=False,ax=axs[kk])
        kk=kk+1
    
    plt.show()
    fig.savefig('figuresNEW/boxplots/SigsVSTrajClass'+names[k]+'_'+tp+'.pdf',bbox_inches='tight')
    k=k+1

In [None]:
dfaux[genestoplot].head()

In [None]:
adata.obs.head().columns

In [None]:
names=['HCC4006','HCC827','PC9','MGH707']
tp='0hr'

fig, axs = plt.subplots(2,2, figsize=(7,7))
axs=axs.ravel()

k=0
for adata in [adata4006,adata827,adataPC9,adata707]:

    df=scrna.andata2df(adata)
    genelist=adata.var_names.tolist()


    dfaux=df[['timepoint']+genelist]
    dfaux=dfaux.groupby(['timepoint']).mean().T

    df2=pd.DataFrame(columns=['gene','log2fc','0hr'])
    df2['gene']=dfaux.index
    df2['0hr']=dfaux['0hr'].values

    log2fc=dfaux['14day']-dfaux['0hr']
    df2['log2fc']=log2fc.values

    ax=axs[k]
    
    x=df2['0hr']
    y=df2['log2fc']
    ax.scatter(x,y,edgecolor='gray',c='cyan',s=50)
    ax.axvline(x=0,color='k',linewidth=1)
    ax.axhline(y=0,color='k',linewidth=1)
    ax.set_xlabel('0hr expression')
    ax.set_ylabel('0hr VS 14day log2fc')
    ax.set_title(names[k])
    k=k+1

    fig.tight_layout()
plt.show()
fig.savefig('figuresNEW/adatpationVSselection.pdf',bbox_inches='tight')

names=['HCC4006','HCC827','PC9','MGH707']
tp='0hr'

fig, axs = plt.subplots(2,2, figsize=(7,7))
axs=axs.ravel()

k=0
for adata in [adata4006,adata827,adataPC9]:#,adata707]:
    
    adataaux=adata[adata.obs['sensitivity']=='tolerant']

    df=scrna.andata2df(adataaux)
    genelist=adata.var_names.tolist()


    dfaux=df[['timepoint']+genelist]
    dfaux=dfaux.groupby(['timepoint']).mean().T

    df2=pd.DataFrame(columns=['gene','log2fc','0hr'])
    df2['gene']=dfaux.index
    df2['0hr']=dfaux['0hr'].values

    log2fc=dfaux['14day']-dfaux['0hr']
    df2['log2fc']=log2fc.values

    ax=axs[k]
    
    x=df2['0hr']
    y=df2['log2fc']
    ax.scatter(x,y,edgecolor='gray',c='cyan',s=50)
    ax.axvline(x=0,color='k',linewidth=1)
    ax.axhline(y=0,color='k',linewidth=1)
    ax.set_xlabel('0hr expression')
    ax.set_ylabel('0hr VS 14day log2fc')
    ax.set_title(names[k])
    k=k+1

    fig.tight_layout()
plt.show()
fig.savefig('figuresNEW/adatpationVSselectionTolerant.pdf',bbox_inches='tight')

In [None]:
names=['HCC4006','HCC827','PC9','MGH707']
tp='0hr'

fig, axs = plt.subplots(2,2, figsize=(7,7))
axs=axs.ravel()

k=0
for adata in [adata4006]:#,adata827,adataPC9,adata707]:
    
    adataaux=adata[adata.obs['sensitivity']=='tolerant']

    df=scrna.andata2df(adataaux)
    genelist=adata.var_names.tolist()


    dfaux=df[['timepoint','sensitivity']+genelist]

In [None]:
df2.head()

In [None]:
names=['HCC4006','HCC827','PC9','MGH707']
tp='0hr'

fig, axs = plt.subplots(2,2, figsize=(7,7))
axs=axs.ravel()

k=0
for adata in [adata4006,adata827,adataPC9]:#,adata707]:
    

    
    

    #log2fc with timepoint of tolerant clones
    dff1=pd.DataFrame(columns=['gene','log2fc 0hrVS14day'])
    adataaux=adata[adata.obs['sensitivity']=='tolerant']
    df=scrna.andata2df(adataaux)
    genelist=adata.var_names.tolist()
    dfaux=df[['timepoint']+genelist]
    dfaux=dfaux.groupby(['timepoint']).mean().T
    dff1['gene']=dfaux.index
    log2fc=dfaux['14day']-dfaux['0hr']
    dff1['log2fc 0hrVS14day']=log2fc.values
    
    #log2fc with timepoint
    dff2=pd.DataFrame(columns=['gene','log2fc sensitiveVStolerant'])
    df=scrna.andata2df(adata)
    genelist=adata.var_names.tolist()
    dfaux=df[['sensitivity']+genelist]
    dfaux=dfaux.groupby(['sensitivity']).mean().T
    dff2['gene']=dfaux.index
    log2fc=dfaux['tolerant']-dfaux['sensitive']
    dff2['log2fc sensitiveVStolerant']=log2fc.values

    ax=axs[k]
    
    df2=dff2.merge(dff1,on='gene')
    
    y=df2['log2fc sensitiveVStolerant']
    x=df2['log2fc 0hrVS14day']
    ax.scatter(x,y,edgecolor='gray',c='cyan',s=50)
    ax.axvline(x=0,color='k',linewidth=1)
    ax.axhline(y=0,color='k',linewidth=1)
    ax.set_ylabel('sensitive VS tolerant log2fc')
    ax.set_xlabel('0hr VS 14day log2fc\n tolerant clones')
    ax.set_title(names[k])
    k=k+1

    fig.tight_layout()
plt.show()
fig.savefig('figuresNEW/adatpationVSselection3.pdf',bbox_inches='tight')

In [None]:
names=['HCC4006','HCC827','PC9','MGH707']
tp='0hr'

fig, axs = plt.subplots(2,2, figsize=(17,13))
axs=axs.ravel()

k=0
for adata in [adata4006,adata827,adataPC9,adata707]:
    
    genestoplot=[]
    EGF=pd.read_excel('Gene sets for heatmaps.xlsx',header=0)
    for c in EGF.columns:
        genes=EGF[c].dropna().tolist()
        if len(genes)>0:
            expr=scrna.andata2df(adata)
            genes=[g for g in genes if g in expr.columns]
            score = expr.loc[:, genes].mean(axis=1)
            adata.obs[c+'_Sig'] = score
            genestoplot.append(c+'_Sig')
    

    #log2fc with timepoint of tolerant clones
    dff1=pd.DataFrame(columns=['gene','log2fc 0hrVS14day'])
    adataaux=adata[adata.obs['sensitivity']=='tolerant']
    df=scrna.andata2df(adataaux)
    genelist=adata.var_names.tolist()
    dfaux=df[['timepoint']+genelist+genestoplot]
    dfaux=dfaux.groupby(['timepoint']).mean().T
    dff1['gene']=dfaux.index
    log2fc=dfaux['14day']-dfaux['0hr']
    dff1['log2fc 0hrVS14day']=log2fc.values
    
    #log2fc with timepoint
    dff2=pd.DataFrame(columns=['gene','log2fc sensitiveVStolerant'])
    df=scrna.andata2df(adata)
    genelist=adata.var_names.tolist()
    dfaux=df[['sensitivity']+genelist+genestoplot]
    dfaux=dfaux.groupby(['sensitivity']).mean().T
    dff2['gene']=dfaux.index
    log2fc=dfaux['tolerant']-dfaux['sensitive']
    dff2['log2fc sensitiveVStolerant']=log2fc.values

    ax=axs[k]
    
    df2=dff2.merge(dff1,on='gene')
    
    y=df2['log2fc sensitiveVStolerant']
    x=df2['log2fc 0hrVS14day']
    #ax.scatter(x,y,edgecolor='gray',c='cyan',s=50)
    ax.axvline(x=0,color='k',linewidth=1)
    ax.axhline(y=0,color='k',linewidth=1)
    ax.set_ylabel('sensitive VS tolerant log2fc')
    ax.set_xlabel('0hr VS 14day log2fc\n tolerant clones')
    ax.set_title(names[k])
    
    for g in genestoplot+['VIM','CDH1','MET']:
        X=df2.loc[df2.gene==g]['log2fc 0hrVS14day']
        Y=df2.loc[df2.gene==g]['log2fc sensitiveVStolerant']
        ax.scatter(X,Y,edgecolor='gray',c='red',s=50)
        ax.text(X,Y,g)
    k=k+1

    fig.tight_layout()
plt.show()
fig.savefig('figuresNEW/adatpationVSselection3pathways.pdf',bbox_inches='tight')

In [None]:
names=['HCC4006','HCC827','PC9','MGH707']
tp='0hr'

fig, axs = plt.subplots(2,2, figsize=(17,13))
axs=axs.ravel()

k=0
for adata in [adata4006,adata827,adataPC9,adata707]:
    
    genestoplot=[]
    EGF=pd.read_excel('Gene sets for heatmaps.xlsx',header=0)
    for c in EGF.columns:
        genes=EGF[c].dropna().tolist()
        if len(genes)>0:
            expr=scrna.andata2df(adata)
            genes=[g for g in genes if g in expr.columns]
            score = expr.loc[:, genes].mean(axis=1)
            adata.obs[c+'_Sig'] = score
            genestoplot.append(c+'_Sig')
    df=scrna.andata2df(adata)
    genelist=adata.var_names.tolist()


    dfaux=df[['timepoint']+genelist+genestoplot]
    dfaux=dfaux.groupby(['timepoint']).mean().T

    df2=pd.DataFrame(columns=['gene','log2fc','0hr'])
    df2['gene']=dfaux.index
    df2['0hr']=dfaux['0hr'].values

    log2fc=dfaux['14day']-dfaux['0hr']
    df2['log2fc']=log2fc.values

    ax=axs[k]
    
    x=df2['0hr']
    y=df2['log2fc']
    #ax.scatter(x,y,edgecolor='gray',c='cyan',s=50)
    ax.axvline(x=0,color='k',linewidth=1)
    ax.axhline(y=0,color='k',linewidth=1)
    ax.set_xlabel('0hr expression')
    ax.set_ylabel('0hr VS 14day log2fc')
    ax.set_title(names[k])
    
    
    for g in genestoplot+['VIM','CDH1','MET']:
        X=df2.loc[df2.gene==g]['0hr']
        Y=df2.loc[df2.gene==g]['log2fc']
        ax.scatter(X,Y,edgecolor='gray',c='red',s=50)
        ax.text(X,Y,g)
    k=k+1

    fig.tight_layout()
plt.show()

In [None]:
from statannot import add_stat_annotation

In [None]:
names=['HCC4006','HCC827','PC9','MGH707']
tp='0hr'

k=0
for adata in [adata4006,adata827,adataPC9,adata707]:
    adata=adata[adata.obs['trajectory_class']!='-']
    #adata=adata[~np.isin(adata.obs['trajectory_class'],['earlier_0','earlier_1','earlier_2','earlier_3','earlier_4','later_0','later_1','later_2','later_3','later_4'])]
    genestoplot=[]
    EGF=pd.read_excel('Gene sets for heatmaps and UMAPs.xlsx',header=0)
    for c in EGF.columns:
        if 'YAP' or 'Epithelial' or 'Mesenchymal' or 'Type I IFN' or 'NRF2' in c:
            genes=EGF[c].dropna().tolist()
            if len(genes)>0:
                expr=scrna.andata2df(adata)
                #score = expr.loc[:, genes].mean(axis=1)
                genes=[g for g in genes if g in expr.columns]
                score = expr.loc[:, genes].mean(axis=1)
                adata.obs[c] = score
                genestoplot.append(c)
    df=scrna.andata2df(adata)
    dfaux=df.loc[df['timepoint']==tp]
    print(names[k]+' '+tp)
    size=(45,20)
    if 'PC9' in names[k]:
        size=(45,35)
    fig, axs = plt.subplots(4,6, figsize=size)
    axs=axs.ravel()
    kk=0
    for g in genestoplot+['VIM','CDH1','MET']:
        sns.boxplot(data=dfaux,x='trajectory_class',y=g,showfliers=False,ax=axs[kk])
        
        if '4006' in names[k]:
            tests=[('0','1'),('0','2'),('0','3'),('0','4'),('1','2'),('1','3'),('1','4'),('2','3'),('2','4'),('3','4')]
            order=['0','1','2','3','4']
        elif '827' in names[k]:
            tests=[('0','1'),('0','2'),('0','3'),('0','4'),('1','2'),('1','3'),('1','4'),('2','3'),('2','4'),('3','4')]
            order=['0','1','2','3','4']
        elif 'PC9' in names[k]:
            tests=[('0','1'),('0','2'),('0','3'),('0','4'),('0','5'),('0','6'),
                   ('1','2'),('1','3'),('1','4'),('1','5'),('1','6'),
                   ('2','3'),('2','4'),('2','5'),('2','6'),
                   ('3','4'),('3','5'),('3','6'),
                   ('4','5'),('4','6'),
                   ('5','6')]
            order=['0','1','2','3','4','5','6']
        elif '707' in names[k]:
            tests=[('0','1'),('0','2'),('0','3'),('1','2'),('1','3'),('2','3')]
            order=['0','1','2','3']
        test_results = add_stat_annotation(axs[kk], data=dfaux, x='trajectory_class', y=g, order=order,
                                   box_pairs=tests,test='t-test_ind', show_test_name=False,text_format='simple', loc='inside', verbose=0)
        
        kk=kk+1
    
    plt.show()
    fig.savefig('figuresNEW/'+names[k]+'/SigsVSTrajClassWithTtest'+names[k]+'_'+tp+'.pdf',bbox_inches='tight')
    k=k+1

In [None]:
names=['HCC4006','HCC827','PC9','MGH707']
tp='14day'

k=0
for adata in [adata4006,adata827,adataPC9,adata707]:
    adata=adata[adata.obs['trajectory_class']!='-']
    #adata=adata[~np.isin(adata.obs['trajectory_class'],['earlier_0','earlier_1','earlier_2','earlier_3','earlier_4','later_0','later_1','later_2','later_3','later_4'])]
    genestoplot=[]
    EGF=pd.read_excel('Gene sets for heatmaps and UMAPs.xlsx',header=0)
    for c in EGF.columns:
        if 'YAP' or 'Epithelial' or 'Mesenchymal' or 'Type I IFN' or 'NRF2' in c:
            genes=EGF[c].dropna().tolist()
            if len(genes)>0:
                expr=scrna.andata2df(adata)
                #score = expr.loc[:, genes].mean(axis=1)
                genes=[g for g in genes if g in expr.columns]
                score = expr.loc[:, genes].mean(axis=1)
                adata.obs[c] = score
                genestoplot.append(c)
    df=scrna.andata2df(adata)
    dfaux=df.loc[df['timepoint']==tp]
    print(names[k]+' '+tp)
    size=(45,20)
    if 'PC9' in names[k]:
        size=(45,35)
    fig, axs = plt.subplots(4,6, figsize=size)
    axs=axs.ravel()
    kk=0
    for g in genestoplot+['VIM','CDH1','MET']:
        sns.boxplot(data=dfaux,x='trajectory_class',y=g,showfliers=False,ax=axs[kk])
        
        if '4006' in names[k]:
            tests=[('0','1'),('0','2'),('0','3'),('0','4'),('1','2'),('1','3'),('1','4'),('2','3'),('2','4'),('3','4')]
            order=['0','1','2','3','4']
        elif '827' in names[k]:
            tests=[('0','1'),('0','2'),('0','3'),('0','4'),('1','2'),('1','3'),('1','4'),('2','3'),('2','4'),('3','4')]
            order=['0','1','2','3','4']
        elif 'PC9' in names[k]:
            tests=[('0','1'),('0','2'),('0','3'),('0','4'),('0','5'),('0','6'),
                   ('1','2'),('1','3'),('1','4'),('1','5'),('1','6'),
                   ('2','3'),('2','4'),('2','5'),('2','6'),
                   ('3','4'),('3','5'),('3','6'),
                   ('4','5'),('4','6'),
                   ('5','6')]
            order=['0','1','2','3','4','5','6']
        elif '707' in names[k]:
            tests=[('0','1'),('0','2'),('0','3'),('1','2'),('1','3'),('2','3')]
            order=['0','1','2','3']
        test_results = add_stat_annotation(axs[kk], data=dfaux, x='trajectory_class', y=g, order=order,
                                   box_pairs=tests,test='t-test_ind', show_test_name=False,text_format='simple', loc='inside', verbose=0)
        
        kk=kk+1
    
    plt.show()
    fig.savefig('figuresNEW/'+names[k]+'/SigsVSTrajClassWithTtest'+names[k]+'_'+tp+'.pdf',bbox_inches='tight')
    k=k+1

In [None]:
#UMAPS for barcodes from Vivek
df=pd.read_csv('/da/onc/BFx/research/krishvi7/barcoding/expressed_barcode/20200415_long_term_resistance_invivo_DNA/output/resistant_clones_for_umap_20220720.txt',sep='\t')
df['trajectory_class'].unique()

In [None]:
#df['cloneid_cell_line']=df['cloneid']+'_'+df['cell_line']

In [None]:
#df2=scrna.andata2df(adata827)

In [None]:
#df2['cloneid_cell_line']=df2['cloneid'].astype(str)+'_HCC827'

In [None]:
#df2[['cloneid_cell_line','trajectory_class']]
#diction={df2.iloc[i]['cloneid_cell_line']:df2.iloc[i]['trajectory_class'] for i in list(range(len(df2)))}

In [None]:
#df2['trajectory_class'].unique()

In [None]:
#df=df[['cloneid','cell_line','group','cloneid_cell_line']]

#df['trajectory_class']=df['cloneid_cell_line'].map(diction)
#df.head()

In [None]:
#df['trajectory_class'].unique()

In [None]:
stratify='timepoint'
feat='UMAP'
colors = ['purple','orangered','greenyellow','turquoise']#['darkred','violet','green','b']
xs = feat + '1'
ys = feat + '2'


fig, axw = plt.subplots(1,1, figsize=(5, 5))

dfaux=df.loc[df['cell_line']=='HCC827']

adata=adata827[np.isin(adata827.obs['cloneid'],dfaux['cloneid'].unique())]
adata=adata[adata.obs['trajectory_class']!='-']

scv.pl.scatter(adata827, color='white', size=60,ax=axw,add_outline=True,alpha=0,show=False)
scv.pl.scatter(adata, color='trajectory_class',edgecolor='gray',linewidths=0.5, size=50,ax=axw,add_outline=None,show=True,legend_loc='right margin'
               ,palette=['lightblue','steelblue','sandybrown','yellow','brown'],title='EGFRi')
plt.axis('off')

plt.show()
fig.savefig('figuresNEW/HCC827resistantclonesUMAP.png',dpi=300, bbox_inches='tight')

In [None]:
stratify='timepoint'
feat='UMAP'
colors = ['purple','orangered','greenyellow','turquoise']#['darkred','violet','green','b']
xs = feat + '1'
ys = feat + '2'


fig, axw = plt.subplots(1,1, figsize=(5, 5))

dfaux=df.loc[df['cell_line']=='HCC827']
adata=adata827[np.isin(adata827.obs['cloneid'],dfaux['cloneid'].unique())]
adata=adata[adata.obs['trajectory_class']!='-']

mask=dfaux['group']=='EGF816-INC280_7'
dfauxnot=dfaux.loc[mask]
adatanot=adata827[np.isin(adata827.obs['cloneid'],dfauxnot['cloneid'].unique())]
adatanot=adatanot[adatanot.obs['trajectory_class']!='-']

scv.pl.scatter(adata827, color='white', size=60,ax=axw,add_outline=True,alpha=0,show=False)
#scv.pl.scatter(adata, color='trajectory_class',edgecolor='gray',linewidths=0.5, size=50,ax=axw,add_outline=None,show=False,legend_loc='right margin'
#               ,palette='Paired',color_map='Paired')
scv.pl.scatter(adatanot, color='trajectory_class',edgecolor='gray',linewidths=0.5, size=50,ax=axw,add_outline=None,show=True,legend_loc='right margin'
               ,palette=['lightblue','steelblue'],color_map='Paired',title='EGFRi/METi')

axw.set_title('')
plt.axis('off')

plt.show()
fig.savefig('figuresNEW/HCC827resistant_toINC280_clonesUMAP.png',dpi=300, bbox_inches='tight')

In [None]:
#### stratify='timepoint'
feat='UMAP'
colors = ['purple','orangered','greenyellow','turquoise']#['darkred','violet','green','b']
xs = feat + '1'
ys = feat + '2'


fig, axw = plt.subplots(1,1, figsize=(5, 5))

dfaux=df.loc[df['cell_line']=='HCC4006']

adata=adata4006[np.isin(adata4006.obs['cloneid'],dfaux['cloneid'].unique())]
adata=adata[adata.obs['trajectory_class']!='-']

scv.pl.scatter(adata4006, color='white', size=60,ax=axw,add_outline=True,alpha=0,show=False)
scv.pl.scatter(adata, color='trajectory_class',edgecolor='gray',linewidths=0.5, size=50,ax=axw,add_outline=None,show=True,legend_loc='right margin'
               ,palette=['lightblue','steelblue','sandybrown','yellow','brown'],title='EGFRi')
plt.axis('off')

plt.show()
fig.savefig('figuresNEW/HCC4006resistantclonesUMAP.png',dpi=300, bbox_inches='tight')

In [None]:
stratify='timepoint'
feat='UMAP'
colors = ['purple','orangered','greenyellow','turquoise']#['darkred','violet','green','b']
xs = feat + '1'
ys = feat + '2'


fig, axw = plt.subplots(1,1, figsize=(5, 5))

dfaux=df.loc[df['cell_line']=='PC9']

adata=adataPC9[np.isin(adataPC9.obs['cloneid'],dfaux['cloneid'].unique())]
adata=adata[adata.obs['trajectory_class']!='-']

scv.pl.scatter(adataPC9, color='white', size=60,ax=axw,add_outline=True,alpha=0,show=False)
scv.pl.scatter(adata, color='trajectory_class',edgecolor='gray',linewidths=0.5, size=50,ax=axw,add_outline=None,show=True,legend_loc='right margin'
               ,palette=['lightblue','steelblue','sandybrown','yellow','brown'],title='EGFRi')
plt.axis('off')

plt.show()
fig.savefig('figuresNEW/PC9resistantclonesUMAP.png',dpi=300, bbox_inches='tight')