In [None]:
import scvelo as scv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scAnalysis as scrna
import scanpy as sc
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

scv.__version__
scv.settings.presenter_view = True  # set max width size for presenter view
scv.settings.set_figure_params('scvelo')  # for beautified visualization

In [None]:
def do_pca(dfin,features,Npca=10,number_genes=[],zscore=True):
    #number_genes is the number of genes used to compute PCA, ordered bu std
    
    
    
    if number_genes==[]:
        features2=features
    else:
        number_genes=min(number_genes,len(features))
        df=dfin.copy()
        df.loc['stds',:]=[0]*len(df.columns)
        df.loc['stds',features] = df.loc[:,features].std(axis=0,ddof=0)/df.loc[:,features].mean(axis=0)
        df=df.sort_values('stds',axis=1,ascending=False)
        features2 = df.columns[0:number_genes]
        
    from sklearn.decomposition import PCA
    

    
    #Compte actual PCs using all samples
    print('Computing PCs...')
    X = dfin.loc[:,features2].dropna()
    if zscore:
        for col in X.columns:
            X[col] = (X[col] - X[col].mean())/X[col].std(ddof=0)
    X=X.fillna(0)
    pca = PCA(n_components=Npca)
    PCs = pca.fit_transform(X)
    perc = 100*pca.explained_variance_ratio_.sum()
    print('Done!')

    print('We use '+str(Npca)+' components to explain '+str(perc)+'% of the variability')

    #create DF with PCA results 
    dfout = dfin.copy()
    pclist = []
    for i in range(PCs.shape[1]):
        dfout.loc[:,'PC'+str(i+1)] = PCs[:,i]
        pclist.append('PC'+str(i+1))

    print(pca.explained_variance_ratio_)  
    print(pca.explained_variance_ratio_.sum())
    
    loadings = pca.components_.T * np.sqrt(pca.explained_variance_)
    loads = pd.DataFrame(loadings,index=features2,columns = pclist)
    loads =loads.sort_values('PC1',ascending=False)
    
    return dfout, loads

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
#LOAD DATA
results_file = 'MGH707.h5ad'#'PC9.h5ad'#
adata = sc.read('/home/estraja4/BFX_research/Resistance/barcoding/final_anndatasNEW/'+results_file)


In [None]:
adata

In [None]:
scrna.plot_label(adata, feat='UMAP', stratify='louvain', legend_inside=True, figsize=(9, 7), ax='',savepdf=False)
plt.show()

In [None]:
scrna.plot_label(adata, feat='UMAP', stratify='timepoint', legend_inside=False, figsize=(9, 7), ax='',savepdf=False)
plt.show()

# Transitions based on cloneid
For each cloneid, compute its proportion in each of the louvain clusters. Use that information to compute likely transitions between timepoints.

In [None]:
adata

In [None]:
embedding='umap'

columns=[t for t in adata.obs['timepoint'].unique()]
for t in adata.obs['timepoint'].unique():
    for j in range(adata.obsm['X_'+embedding].shape[1]):
        columns.append(t+'_'+str(j))
    columns.append(t+'_count')
        
transitions=pd.DataFrame(index=[bc for bc in adata.obs['cloneid'].unique() if 'no-bc' not in bc and '-' not in bc],columns=columns)
for bc in adata.obs['cloneid'].unique():
    if 'no-bc' not in bc and '-' not in bc:
        aux=adata[adata.obs['cloneid']==bc]
        for t in adata.obs['timepoint'].unique():
            aux2=aux[aux.obs['timepoint']==t]
            val=np.median(aux2.obsm['X_'+embedding],axis=0)
            val=[i for i in val]
            transitions[t].loc[bc]=val
            for j in range(aux2.obsm['X_'+embedding].shape[1]):
                transitions[t+'_'+str(j)].loc[bc]=val[j]
            transitions[t+'_count'].loc[bc]=len(aux2)

In [None]:
transitions2=transitions.replace('nan',np.nan).dropna()


In [None]:
#with colors matching cellxgene

stratify='timepoint'
feat='UMAP'
colors = ['purple','orangered','greenyellow','turquoise']#['darkred','violet','green','b']
xs = feat + '1'
ys = feat + '2'

expr = scrna.andata2df(adata)
expr=expr.sort_values(stratify)

fig, axs = plt.subplots(1, 1, figsize=(7, 7))
axw = axs
for c, color in zip(expr[stratify].unique(), colors):  # print cell type text
    mask = expr[stratify] == c
    expraux = expr.loc[mask, [xs, ys, stratify]]
    axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=50, marker='.', label=c,
                linewidths=.3)
    xav = np.mean(expraux[xs])
    yav = np.mean(expraux[ys])
    ctype = expraux[stratify].values[0]
    axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
             bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
axw.set_xlabel(xs)
axw.set_ylabel(ys)


colors=['violet','b','darkred','green']
k=0
XX=[]
YY=[]
for t in ['0hr','24hr','14day','16day']:
    x=[i[0] for i in transitions2[t]]
    y=[i[1] for i in transitions2[t]]
    XX.append(x)
    YY.append(y)
    axs.scatter(x,y,color=colors[k])
    k=k+1
axs.plot(XX,YY,'-',color='lightgray',alpha=0.6)
plt.show()

fig, axs = plt.subplots(1, 1, figsize=(12, 12))
scrna.plot_label(adata, feat='UMAP', stratify='timepoint', legend_inside=True, figsize=(9, 7), ax=axs,savepdf=False)

colors=['darkred','violet','green','b']
k=0
XX=[]
YY=[]
for t in ['0hr','24hr','14day','16day']:
    x=[i[0] for i in transitions2[t]]
    y=[i[1] for i in transitions2[t]]
    XX.append(x)
    YY.append(y)
    axs.scatter(x,y,color=colors[k])
    k=k+1
axs.plot(XX,YY,'-',color='lightgray',alpha=0.6)
plt.show()

In [None]:
def add_kmeans(df,n_clusters=4):

    import sklearn.cluster as cluster
    import seaborn as sns

    features=[i for i in df.columns if 'PC' in i]
    X = df.loc[:,features]

    clustering = cluster.KMeans(n_clusters=n_clusters)#.AffinityPropagation()#n_clusters=n_clusters)#
    clustering.fit(X)

    df2=df.copy()
    df2['clustering']=[str(i) for i in clustering.labels_]
    
    sns.lmplot(data=df2,x='PC1',y='PC2',hue='clustering',fit_reg=False)
    for i in range(len(df)):
        plt.text(df2.iloc[i]['PC1'],df2.iloc[i]['PC2'],str(i))
    plt.show()
    
    return df2

In [None]:
feats=[t for t in transitions2.columns if '_' in t and 'count' not in t]
transitions3,load = do_pca(transitions2,feats,Npca=4,number_genes=[],zscore=True)
transitions3=add_kmeans(transitions3,n_clusters=5)

In [None]:
transitions3['clustering'].unique()

In [None]:
#with cellxgene colors

bcs=transitions3.sort_values('clustering').index.unique()

fig, ax = plt.subplots(7, 10, figsize=(55, 35))
ax=ax.ravel()
kk=0
for bc in bcs:
    axs=ax[kk]
    df=transitions3.loc[bc].to_frame().T
    adataaux=adata[adata.obs['cloneid']==bc]
    dff=scrna.andata2df(adata)
    #axs.scatter(dff['UMAP1'],dff['UMAP2'],color='lightgray',alpha=0.3)
    
    
    stratify='timepoint'
    feat='UMAP'
    colors = ['purple','orangered','greenyellow','turquoise']#['darkred','violet','green','b']
    xs = feat + '1'
    ys = feat + '2'
    expr = scrna.andata2df(adataaux)
    expr=expr.sort_values(stratify)
    axw = axs
    for c, color in zip(expr[stratify].unique(), colors):  # print cell type text
        mask = expr[stratify] == c
        expraux = expr.loc[mask, [xs, ys, stratify]]
        axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=80, marker='.', label=c,
                    linewidths=.3)
        xav = np.mean(expraux[xs])
        yav = np.mean(expraux[ys])
        ctype = expraux[stratify].values[0]
        #axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
        #         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
    axw.set_xlabel(xs)
    axw.set_ylabel(ys)


    colors=['violet','b','darkred','green']#['darkred','violet','green','b']
    k=0
    XX=[]
    YY=[]
    for t in ['0hr','24hr','14day','16day']:
        x=[i[0] for i in df[t]]
        y=[i[1] for i in df[t]]
        s=[i*2 for i in df[t+'_count']]
        XX.append(x)
        YY.append(y)
        axs.scatter(x,y,s=s,color=colors[k])
        k=k+1
    axs.plot(XX,YY,'-',color='k',alpha=0.6)
    axs.text(5,-7.5,str(df['clustering'].unique()[0]),fontsize=20)
    axs.set_xticks([])
    
    scv.pl.scatter(adata, color='white', size=80,ax=axs,add_outline=True,alpha=0,show=False)
    axs.set_title(bc+','+str(kk))
    #sc.pl.umap(adata,alpha=0.1, s=100,ax=axs)
    kk=kk+1
    
plt.show()
#fig.savefig("figures/HCC4006trajectories.png", dpi=300, bbox_inches='tight')

In [None]:
len(bcs)

In [None]:
#present only in 0 and 24r
idx=[i for i in transitions.index if ~np.isnan(transitions['0hr'].loc[i]).any() and ~np.isnan(transitions['24hr'].loc[i]).any() and
     np.isnan(transitions['14day'].loc[i]).any() and np.isnan(transitions['16day'].loc[i]).any()]

transitions22=transitions.loc[idx].copy()
feats=['0hr_0','0hr_1','24hr_0','24hr_1']
transitions33,load = do_pca(transitions22,feats,Npca=2,number_genes=[],zscore=True)
transitions33=add_kmeans(transitions33,n_clusters=1)
transitions33.clustering='earlier_'+transitions33.clustering

In [None]:
#present only in 14day and 16dayr
idx=[i for i in transitions.index if np.isnan(transitions['0hr'].loc[i]).any() and np.isnan(transitions['24hr'].loc[i]).any() and
     ~np.isnan(transitions['14day'].loc[i]).any() and ~np.isnan(transitions['16day'].loc[i]).any()]

transitions222=transitions.loc[idx].copy()
feats=['14day_0','14day_1','16day_0','16day_1']
transitions333,load = do_pca(transitions222,feats,Npca=2,number_genes=[],zscore=True)
transitions333=add_kmeans(transitions333,n_clusters=3)
transitions333.clustering='later_'+transitions333.clustering

In [None]:
transitionsall=transitions3.append(transitions33)
transitionsall=transitionsall.append(transitions333)

In [None]:
hue='clustering'
fig, ax = plt.subplots(1, 3, figsize=(18, 5))

df=transitions3
axs=ax[0]

df=df.sort_values(hue)
for h in df[hue].unique():
    df2=df.loc[df[hue]==h]
    axs.scatter(df2['PC1'],df2['PC2'],marker='o',s=100,label=h)
    axs.legend(fontsize=16)
    axs.axes.xaxis.set_ticks([])
    axs.axes.yaxis.set_ticks([])
    axs.set_xlabel('PC1',fontsize=16)
    axs.set_ylabel('PC2',fontsize=16)
#for i in range(len(df)):
    #axs.text(df.iloc[i]['PC1'],df.iloc[i]['PC2'],str(i))
    
df=transitions33
axs=ax[1]

df=df.sort_values(hue)
for h in df[hue].unique():
    df2=df.loc[df[hue]==h]
    axs.scatter(df2['PC1'],df2['PC2'],marker='o',s=100,label=h)
    axs.legend(fontsize=16)
    axs.axes.xaxis.set_ticks([])
    axs.axes.yaxis.set_ticks([])
    axs.set_xlabel('PC1',fontsize=16)
    axs.set_ylabel('PC2',fontsize=16)
#for i in range(len(df)):
    #axs.text(df.iloc[i]['PC1'],df.iloc[i]['PC2'],str(i))
    
df=transitions333
axs=ax[2]

df=df.sort_values(hue)
for h in df[hue].unique():
    df2=df.loc[df[hue]==h]
    axs.scatter(df2['PC1'],df2['PC2'],marker='o',s=100,label=h)
    axs.legend(fontsize=16)
    axs.axes.xaxis.set_ticks([])
    axs.axes.yaxis.set_ticks([])
    axs.set_xlabel('PC1',fontsize=16)
    axs.set_ylabel('PC2',fontsize=16)
#for i in range(len(df)):
    #axs.text(df.iloc[i]['PC1'],df.iloc[i]['PC2'],str(i))
    
plt.show()
fig.savefig('figures/figure3/trajPC_MGH707.pdf', bbox_inches='tight')

In [None]:
#add trajectory type into the andata file
diction={i:transitionsall['clustering'].loc[i] for i in transitionsall.index}
s1=adata.obs['cloneid']
s1=s1.map(diction)
adata.obs['trajectory_class']=s1
adata.obs['trajectory_class']=adata.obs['trajectory_class'].replace(np.nan,'-')

In [None]:
bcs=transitionsall.sort_values('clustering').index.unique()

fig, ax = plt.subplots(8, 11, figsize=(100, 60))
ax=ax.ravel()
kk=0
for bc in bcs:
    axs=ax[kk]
    df=transitionsall.loc[bc].to_frame().T
    adataaux=adata[adata.obs['cloneid']==bc]
    dff=scrna.andata2df(adata)
    #axs.scatter(dff['UMAP1'],dff['UMAP2'],color='lightgray',alpha=0.3)
    
    
    stratify='timepoint'
    feat='UMAP'
    colors = ['purple','orangered','greenyellow','turquoise']#['darkred','violet','green','b']
    xs = feat + '1'
    ys = feat + '2'
    expr = scrna.andata2df(adataaux)
    expr=expr.sort_values(stratify)
    axw = axs
    for c, color in zip(expr[stratify].unique(), colors):  # print cell type text
        mask = expr[stratify] == c
        expraux = expr.loc[mask, [xs, ys, stratify]]
        axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=80, marker='.', label=c,
                    linewidths=.3)
        xav = np.mean(expraux[xs])
        yav = np.mean(expraux[ys])
        ctype = expraux[stratify].values[0]
        #axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
        #         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
    axw.set_xlabel(xs)
    axw.set_ylabel(ys)


    colors=['violet','b','darkred','green']#['darkred','violet','green','b']
    k=0
    XX=[]
    YY=[]
    for t in ['0hr','24hr','14day','16day']:
        x=[i[0] for i in df[t]]
        y=[i[1] for i in df[t]]
        s=[i*2 for i in df[t+'_count']]
        XX.append(x)
        YY.append(y)
        #axs.scatter(x,y,s=s,color=colors[k])
        k=k+1
    #axs.plot(XX,YY,'-',color='k',alpha=0.6)
    #axs.text(5,-7.5,str(df['clustering'].unique()[0]),fontsize=20)
    axs.set_xticks([])
    
    scv.pl.scatter(adata, color='white', size=80,ax=axs,add_outline=True,alpha=0,show=False)
    axs.set_title(bc+','+str(kk))
    #sc.pl.umap(adata,alpha=0.1, s=100,ax=axs)
    kk=kk+1
    
plt.show()
fig.savefig("figures/MGH707umaps.png", dpi=300, bbox_inches='tight')

In [None]:
bcs=transitionsall.sort_values('clustering').index.unique()

fig, ax = plt.subplots(8, 11, figsize=(100, 60))
ax=ax.ravel()
kk=0
for bc in bcs:
    axs=ax[kk]
    df=transitionsall.loc[bc].to_frame().T
    adataaux=adata[adata.obs['cloneid']==bc]
    dff=scrna.andata2df(adata)
    #axs.scatter(dff['UMAP1'],dff['UMAP2'],color='lightgray',alpha=0.3)
    
    
    stratify='timepoint'
    feat='UMAP'
    colors = ['purple','orangered','greenyellow','turquoise']#['darkred','violet','green','b']
    xs = feat + '1'
    ys = feat + '2'
    expr = scrna.andata2df(adataaux)
    expr=expr.sort_values(stratify)
    axw = axs
    for c, color in zip(expr[stratify].unique(), colors):  # print cell type text
        mask = expr[stratify] == c
        expraux = expr.loc[mask, [xs, ys, stratify]]
        axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=80, marker='.', label=c,
                    linewidths=.3)
        xav = np.mean(expraux[xs])
        yav = np.mean(expraux[ys])
        ctype = expraux[stratify].values[0]
        #axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
        #         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
    axw.set_xlabel(xs)
    axw.set_ylabel(ys)


    colors=['violet','b','darkred','green']#['darkred','violet','green','b']
    k=0
    XX=[]
    YY=[]
    for t in ['0hr','24hr','14day','16day']:
        x=[i[0] for i in df[t]]
        y=[i[1] for i in df[t]]
        s=[i*2 for i in df[t+'_count']]
        XX.append(x)
        YY.append(y)
        axs.scatter(x,y,s=s,color=colors[k])
        k=k+1
    axs.plot(XX,YY,'-',color='k',alpha=0.6)
    axs.text(5,-7.5,str(df['clustering'].unique()[0]),fontsize=20)
    axs.set_xticks([])
    
    scv.pl.scatter(adata, color='white', size=80,ax=axs,add_outline=True,alpha=0,show=False)
    axs.set_title(bc+','+str(kk))
    #sc.pl.umap(adata,alpha=0.1, s=100,ax=axs)
    kk=kk+1
    
plt.show()
#fig.savefig("figures/HCC4006trajectories.png", dpi=300, bbox_inches='tight')

In [None]:
len(bcs)

In [None]:
fig.savefig("figures/MGH707trajectories.png", dpi=300, bbox_inches='tight')

In [None]:
#traj classes

fig, ax = plt.subplots(2, 5, figsize=(20, 7))
ax=ax.ravel()
kk=0

adata2=adata[adata.obs['trajectory_class']!='-']
tclasses=adata2.obs['trajectory_class'].unique().tolist()
tclasses.sort()
for bc in tclasses:
    axs=ax[kk]
    df=transitionsall.loc[transitionsall['clustering']==bc]
    df=df[[c for c in df.columns if '_' in c]]
    df=df.mean()
    adataaux=adata2[adata2.obs['trajectory_class']==bc]
    dff=scrna.andata2df(adata2)
    #axs.scatter(dff['UMAP1'],dff['UMAP2'],color='lightgray',alpha=0.3)
    
    
    stratify='timepoint'
    feat='UMAP'
    colors = ['purple','orangered','greenyellow','turquoise']#['darkred','violet','green','b']
    xs = feat + '1'
    ys = feat + '2'
    expr = scrna.andata2df(adataaux)
    expr=expr.sort_values(stratify)
    axw = axs
    for c, color in zip(expr[stratify].unique(), colors):  # print cell type text
        mask = expr[stratify] == c
        expraux = expr.loc[mask, [xs, ys, stratify]]
        axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=80, marker='.', label=c,
                    linewidths=.3)
        xav = np.mean(expraux[xs])
        yav = np.mean(expraux[ys])
        ctype = expraux[stratify].values[0]
        #axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
        #         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
    axw.set_xlabel(xs)
    axw.set_ylabel(ys)


    colors=['violet','b','darkred','green']#['darkred','violet','green','b']
    k=0
    XX=[]
    YY=[]
    for t in ['0hr','24hr','14day','16day']:
        x=df[t+'_0']
        y=df[t+'_1']
        s=df[t+'_count']
        XX.append(x)
        YY.append(y)
        #axs.scatter(x,y,s=s*10,color=None,edgecolors=colors[k],facecolors='none',linewidth=3)
        axs.scatter(x,y,s=s*10,color=colors[k],alpha=0.5,edgecolors='k')#edgecolors=colors[k],facecolors='none',linewidth=3)
        axs.scatter(x,y,s=s*10,color=colors[k],facecolors='none',edgecolors='k',linewidth=1)
        k=k+1
    axs.plot(XX,YY,'-',color='k',alpha=0.6)
    #axs.text(5,-7.5,str(df['clustering'].unique()[0]),fontsize=20)
    axs.set_xticks([])
    
    scv.pl.scatter(adata, color='white', size=80,ax=axs,add_outline=True,alpha=0,show=False)
    axs.set_title(bc,fontsize=16)
    #sc.pl.umap(adata,alpha=0.1, s=100,ax=axs)
    kk=kk+1
plt.tight_layout()
plt.show()

fig.savefig("figures/figure3/MGH707_trajsClassUMAPS.png", dpi=300, bbox_inches='tight')

In [None]:
adataaux=adata[adata.obs['trajectory_class']!='-']
scrna.plot_label(adataaux, feat='UMAP', stratify='timepoint', legend_inside=False, figsize=(6, 5),savepdf=False)
plt.show()

In [None]:
adataaux.obs['trajectory_class'].unique()

In [None]:
fig, axs = plt.subplots(2, 5, figsize=(30, 10))
axs=axs.ravel()
k=0
for c in adataaux.obs['trajectory_class'].sort_values().unique():
    
    stratify='timepoint'
    feat='UMAP'
    colors = ['purple','orangered','greenyellow','turquoise']#['darkred','violet','green','b']
    xs = feat + '1'
    ys = feat + '2'
    expr = scrna.andata2df(adataaux)
    expr=expr.sort_values(stratify)
    axw = axs[k]
    for t, color in zip(expr[stratify].unique(), colors):  # print cell type text
        mask = expr[stratify] == t
        expraux = expr.loc[mask, [xs, ys, stratify]]
        axw.scatter(expraux[xs], expraux[ys], c=color, edgecolor='lightgray', s=80, marker='.', label=t,
                    linewidths=.3)
        xav = np.mean(expraux[xs])
        yav = np.mean(expraux[ys])
        ctype = expraux[stratify].values[0]
        #axw.text(xav, yav, ctype, fontsize=10, verticalalignment='center', horizontalalignment='center',
        #         bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
    axw.set_xlabel(xs)
    axw.set_ylabel(ys)
    axw.legend()
    
    #adataaux2=adataaux[adataaux.obs['trajectory_class']==c]
    #scv.pl.scatter(adataaux2, size=80,ax=axs[k],add_outline=False,alpha=0,show=False)
    #axs[k]=scv.pl.scatter(adataaux2,color=False, alpha=0,size=80,add_outline=True,show=False,ax=axs[k],outline_width=(0.3, 0.05))
    #scv.pl.scatter(adata, color='white', size=80,ax=axs[k],add_outline=True,alpha=0,show=False)
    
    axs[k].set_title(c)
    k=k+1
plt.show()

In [None]:
adata.var_names_make_unique()
adata.write(results_file)

In [None]:
#what's special about the different categories at timepoint 0? what makes them behave one way or another?
print('at 0hr')

adataaux=adata[adata.obs['timepoint']=='0hr']
adataaux=adataaux[adataaux.obs['trajectory_class']!='-']

scrna.plot_label(adataaux, feat='UMAP', stratify='trajectory_class', legend_inside=False, figsize=(6, 5),savepdf=False)
plt.show()

sc.tl.rank_genes_groups(adataaux, 'trajectory_class',use_raw=False)
ranked_genes = pd.DataFrame(adataaux.uns['rank_genes_groups']['names'])
sc.tl.dendrogram(adataaux,groupby='trajectory_class')
axs = sc.pl.rank_genes_groups_dotplot(adataaux, n_genes=15, vmin=-1,vmax=1,use_raw=False, color_map='bwr')

In [None]:
#what's special about the different categories at timepoint 0? what makes them behave one way or another?
print('at 24hr')
adataaux=adata[adata.obs['timepoint']=='24hr']
adataaux=adataaux[adataaux.obs['trajectory_class']!='-']

scrna.plot_label(adataaux, feat='UMAP', stratify='trajectory_class', legend_inside=False, figsize=(6, 5),savepdf=False)
plt.show()

sc.tl.rank_genes_groups(adataaux, 'trajectory_class',use_raw=False)
ranked_genes = pd.DataFrame(adataaux.uns['rank_genes_groups']['names'])
sc.tl.dendrogram(adataaux,groupby='trajectory_class')
axs = sc.pl.rank_genes_groups_dotplot(adataaux, n_genes=15, vmin=-1,vmax=1,use_raw=False, color_map='bwr')

# Compute cell cycle scores from Macosko et al 2012

In [None]:
ccs=pd.read_csv('cell_cycle_genesMacosko2012.csv')

expr=scrna.andata2df(adata)
for c in ccs.columns:
    genes=[i.replace(' ','') for i in ccs[c].dropna()]
    genes = [i for i in genes if i in adata.var_names]
    
    score = expr.loc[:, genes].mean(axis=1)

    # store signature list and value inside andata
    adata.uns['cell_cycle_' + c] = genes
    adata.obs[c] = score

In [None]:
cell_cycle_scores=[c for c in ccs.columns]
sc.pl.umap(adata, color=cell_cycle_scores, s=50)
plt.show()

In [None]:
expr=scrna.andata2df(adata)
expr=expr.loc[expr['trajectory_class']!='-']

fig, axs = plt.subplots(4, 5, figsize=(25, 20))
axs=axs.ravel()
k=0
for t in expr.timepoint.unique():
    expraux=expr.loc[expr.timepoint==t]
    for s in cell_cycle_scores:
        sns.boxplot(data=expraux,y=s,x='trajectory_class',ax=axs[k],color='lightgray')
        sns.swarmplot(data=expraux,y=s,x='trajectory_class',ax=axs[k])
        k=k+1
plt.show()

In [None]:

category='trajectory_class'
groupby='trajectory_class'
conditions='timepoint'


#keep only barcodes represented accross all timepoints
adataaux=adata[adata.obs['trajectory_class']!='-']#look only at actual classes

stratify=groupby

expr = scrna.andata2df(adataaux)
if groupby=='trajectory_class':
    expr=expr.loc[expr[category]!='-']
expr = expr.sort_values(stratify)

df = expr[[stratify, conditions, 'Barcode']]
# construct dataframe with cell type proportions depending on treatment
cell_numbers = df.groupby([stratify, conditions]).nunique()['Barcode']
cell_numbers=cell_numbers.to_frame().reset_index()
prop_cells   = df.groupby([stratify, conditions]).nunique()['Barcode'] / df.groupby([conditions]).nunique()['Barcode']
prop_cells = prop_cells.to_frame()
prop_cells.reset_index(inplace=True)

#plot
from matplotlib.pyplot import cm

fig, axs = plt.subplots(2, 2, figsize=(22, 15))
axs=axs.ravel()

df=prop_cells.sort_values(groupby)#('Barcode')

#conds=['0hr','24hr','14day','14day MET amp','14day EMT','16day','16day MET amp','16day EMT']#df.timepoint.unique()
conds=['0hr','24hr','14day','16day']

exp_bcs=[i for i in df[groupby].unique()]
color=cm.Set2(np.linspace(0,1,len(exp_bcs)))#cm.tab10(np.linspace(0,1,len(exp_bcs)))
#np.random.shuffle(color)

r = list(range(len(df[conditions].unique())))

vals=np.zeros(len(r)).tolist()
k=0
for bc in exp_bcs:
    bars=[]
    dfaux=df.loc[df[groupby]==bc]
    
    for t in conds:
        if t in dfaux[conditions].unique():
            dfaux2=dfaux.loc[dfaux[conditions]==t]
            value=dfaux2.Barcode.values[0]
        else:
            value=0
        bars.append(value)
    
        
    axs[0].bar(r, bars,bottom=vals, width=1,edgecolor=None,color=color[k])#, width=barWidth)
    vals=[bars[i]+vals[i] for i in range(len(bars))]
    
    tendency=[bars[k]-bars[0] for k in range(len(bars))]
    axs[1].plot(r,bars,color=color[k],linewidth=5,label=bc)
    axs[2].plot(r,tendency,color=color[k],linewidth=5,label=bc)
    #axs[1].text(r[-2],bars[-2]+0.02,bc,fontsize=20)

    k=k+1
axs[1].legend(fontsize=20)


axs[0].set_ylabel(groupby+' proportion',fontsize=20)
axs[0].set_xticks(list(range(len(r)))) 
axs[0].set_xticklabels(conds,fontsize=20)
axs[0].set_yticklabels([0,0.2,0.4,0.6,0.8,1],fontsize=20)
axs[0].set_xlim([-0.5,3.5])
axs[0].set_ylim([0,1])

axs[1].set_ylabel(groupby+' proportion trend',fontsize=20)
axs[1].set_xticks(list(range(len(r)))) 
axs[1].set_yticklabels([0,0.2,0.4,0.6,0.8,1],fontsize=20)
axs[1].set_xticklabels(conds,fontsize=20)
axs[1].set_ylim([0,1])

axs[2].set_ylabel(groupby+' proportion change',fontsize=20)
axs[2].set_xticks(list(range(len(r)))) 
#axs[2].set_yticklabels([-0.5,0.5],fontsize=20)
axs[2].set_xticklabels(conds,fontsize=20)
axs[2].set_ylim([-0.5,0.5])


#last plot, axs[3]
df=adata.obs[['timepoint','cloneid','trajectory_class']]
bcs=[]
change=[]
trajclass=[]
for bc in df['cloneid'].unique():
    df2=df.loc[df['cloneid']==bc]
    
    scr=len(df2.loc[df2['timepoint']=='0hr'])
    ontx=len(df2.loc[df2['timepoint']=='14day'])
    tc=df2['trajectory_class']
    if scr>0 and ontx>0:
        bcs.append(bc)
        change.append(ontx-scr)
        trajclass.append(tc.values[0])

res=pd.DataFrame()
res['cloneid']=bcs
res['difference 14day VS 0hr']=change
res['trajectory_class']=trajclass
res=res.loc[res['trajectory_class']!='-']
res
sns.boxplot(data=res,x='trajectory_class',y='difference 14day VS 0hr',ax=axs[3],color='white')
sns.swarmplot(data=res,x='trajectory_class',y='difference 14day VS 0hr',ax=axs[3],size=10,color='black')
ax=axs[3]
for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
              ax.get_xticklabels() + ax.get_yticklabels()):
    item.set_fontsize(20)

plt.show()