In [None]:
import scanpy as sc
import scvelo as sv
import loompy as lp
import pandas as pd
%pylab inline

# Plots of the presentation

In [None]:
adata = sv.read_loom('6_files.loom')
adata.var_names_make_unique()

In [None]:
adata.obs_names = [s[5:].split('.')[0] for s in adata.obs_names]

In [None]:
# run this cell to ignore results from the first experiment
adata = adata[invert(adata.obs_names.str.startswith( ('A172', 'A257') ) ),:]
adata

In [None]:
# Loads the description files, for days
dct_day = {}
with open('../../data/sample_description/A172.sampleDescription.txt') as f_in:
    for line in f_in:
        line = line.split('|')
        dct_day[line[0]] = line[1].split('_')[1]
with open('../../data/sample_description/A257.sampleDescription.txt') as f_in:
    for line in f_in:
        line = line.split('|')
        dct_day[line[0]] = line[1].split('_')[1]
with open('../../data/sample_description/sample_annotation_comb.txt') as f_in:
    for line in f_in:
        line = line.split('\t')
        dct_day[line[0]] = 'day' + line[-1]    

In [None]:
# Makes day nomenclature homogeneous

for i in range(1, 10):
    dct_day['A172T0%i' % i] = dct_day['A172T%i' % i]
    dct_day['A267T%i' % i] = dct_day['A267T0%i' % i]

adata.obs['day'] = 'na'
adata.obs['day'] = [dct_day.get(name, 'na') for name in adata.obs_names]
days = adata.obs['day']
adata.obs = adata.obs.copy()
adata.obs['day'][days == 'day0'] = 'day00'
adata.obs['day'][days == 'DAY7'] = 'day07'
adata.obs['day'][days == 'day7'] = 'day07'
adata.obs['day'][days == 'day7\n'] = 'day07'
adata.obs['day'][days == 'DAY7+2'] = 'day09'
adata.obs['day'][days == 'day9\n'] = 'day09'
adata.obs['day'][days == 'DAY7+3'] = 'day10'
adata.obs['day'][days == 'day10\n'] = 'day10'
adata.obs['day'][days == 'DAY7+4'] = 'day11'
adata.obs['day'][days == 'day11\n'] = 'day11'
adata.obs['day'][days == 'day14\n'] = 'day14'
adata.obs['day'][days == 'day17\n'] = 'day17'
adata.obs['day'][days == 'day22\n'] = 'day22'
adata = adata[adata.obs['day'] != 'na',:]

In [None]:
# Loads the ICs file
ics = pd.read_csv('../../data/scores/ASP14_TS_ul_centered.txt_filtered.txt.moduleAverages', sep='\t')
for i in range(1,10):
    ics['SAMPLE'][i-1] = 'A267T%i' % i

In [None]:
# Annotates adata with the ICs
# Careful, this discards the two first experiments!
thr = 0
ics = ics.sort_values(by='SAMPLE')
adata = adata[ics['SAMPLE'],:]
for key in ('IC1+', 'IC2+', 'IC10+', 'IC30+'):
    vals = ics[key].values - thr
    vals[vals < 0] = 0
    adata.obs[key] = vals
adata.obs['IC1-IC2'] = adata.obs['IC1+'] - adata.obs['IC2+']
adata.obs['IC1+IC2'] = adata.obs['IC1+'] + adata.obs['IC2+']

In [None]:
# Preprocess adata for umap & velocity
sv.pp.filter_and_normalize(adata)
sv.pp.moments(adata)
sv.tl.velocity(adata, mode='stochastic')
sv.tl.velocity_graph(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
sv.tl.velocity_embedding(adata, basis='umap')

In [None]:
# just chooses a nice palette
adata.uns['day_colors'] = [matplotlib.colors.to_hex(c) for c in matplotlib.cm.cool(arange(8)/8)]

In [None]:
sc.settings.set_figure_params(dpi=200)
sc.pl.umap(adata, color='day', size=32, save='global_umap.png')

In [None]:
# Displays day by day
sv.settings.set_figure_params(dpi=300)
to_disp = []
for day in sorted(set(adata.obs['day'])):
    to_disp.append(day)
    print(to_disp)
    sv.pl.velocity_embedding(adata[isin(adata.obs['day'], to_disp),:], basis='umap', color='day', legend_loc='lower left', alpha=.7, frameon=True,
                         title='RNA velocity of cells at different times', legend_fontsize=7, arrow_length=2, save='%s.png' % day)

In [None]:
sv.settings.set_figure_params(dpi=300)
sv.pl.velocity_embedding(adata, basis='umap', color='day', legend_loc='upper right', alpha=.7, frameon=True,
                         title='RNA velocity of cells at different times', legend_fontsize=7, arrow_length=2, save='all_days.png')
sv.pl.velocity_embedding(adata, basis='umap', color='IC1+IC2', legend_loc='on data', alpha=.7, frameon=True,
                         title='RNA velocity of cells at different times (color: IC1+IC2)', legend_fontsize=7, arrow_length=2, save='all_plus.png')
sv.pl.velocity_embedding(adata, basis='umap', color='IC1-IC2', legend_loc='on data', alpha=.7, frameon=True,
                         title='RNA velocity of cells at different times (color: IC1-IC2)', legend_fontsize=7, arrow_length=2, save='all_minus.png')

In [None]:
# Here, plots are done without day 7.
adata_no7 = adata[adata.obs['day'] != 'day07',:]
sv.pp.filter_and_normalize(adata_no7)
sv.pp.moments(adata_no7)
sv.tl.velocity(adata_no7, mode='stochastic')
sv.tl.velocity_graph(adata_no7)
sc.pp.neighbors(adata_no7)
sc.tl.umap(adata_no7)
sc.tl.louvain(adata_no7)
sv.tl.velocity_embedding(adata_no7, basis='umap')

In [None]:
sv.settings.set_figure_params(dpi=300)
sv.pl.velocity_embedding(adata_no7, basis='umap', color='day', legend_loc='upper right', alpha=.7, frameon=True,
                         title='RNA velocity of cells at different times', legend_fontsize=7, arrow_length=2, save='no7_days.png')
sv.pl.velocity_embedding(adata_no7, basis='umap', color='IC1+IC2', legend_loc='on data', alpha=.7, frameon=True,
                         title='RNA velocity of cells at different times (color: IC1+IC2)', legend_fontsize=7, arrow_length=2, save='no7_plus.png')
sv.pl.velocity_embedding(adata_no7, basis='umap', color='IC1-IC2', legend_loc='on data', alpha=.7, frameon=True,
                         title='RNA velocity of cells at different times (color: IC1-IC2)', legend_fontsize=7, arrow_length=2, save='no7_minus.png')
sv.pl.velocity_embedding(adata_no7, basis='umap', color='IC10+', legend_loc='on data', alpha=.7, frameon=True,
                         title='RNA velocity of cells at different times (color: IC10)', legend_fontsize=7, arrow_length=2, save='no7_ic10.png')
sv.pl.velocity_embedding(adata_no7, basis='umap', color='louvain', legend_loc='upper right', alpha=.7, frameon=True, palette='?',
                         title='RNA velocity of cells at different times', legend_fontsize=7, arrow_length=2, save='no7_louvain.png')

# Experimental below

In [None]:
sel = (adata_no7.obs['louvain'] == '0') + (adata_no7.obs['louvain'] == '3')
adata_mys = adata_no7[sel,:]
adata_mys

In [None]:
len(sums)

In [None]:
sums = np.argsort(np.sum(abs(adata_no7.layers['velocity']), axis=0))
sums_f = np.argsort(np.sum(abs(adata.layers['velocity']), axis=0))

N = 200
mys_genes = [ adata_mys.var_names[sums[len(sums) - i - 1]] for i in range(N) ]
all_genes = [ adata.var_names[sums_f[len(sums) - i - 1]] for i in range(int(N/4)) ]

def complementary(l,m): return [g for g in l if g not in m]

exc_genes = complementary(mys_genes, all_genes)
for g in mys_genes:
    print(g)

In [None]:
len(exc_genes)

In [None]:
all_genes

In [None]:
adata.layers['velocity'].shape

In [None]:
sv.pl.velocity_embedding_stream(adata, basis='umap', color='day', legend_loc='on data', 
                         title='RNA velocity of cells at different times', legend_fontsize=7)

In [None]:
sv.pl.velocity_embedding(adata, basis='umap', color='day', save='all.png')

In [None]:
sv.settings.set_figure_params(dpi=400)
for _adata in adatas:
    sv.pp.filter_and_normalize(_adata)
    sv.pp.moments(_adata)
    sv.tl.velocity(_adata, mode='stochastic')
    sv.tl.velocity_graph(_adata)
    sc.pp.neighbors(_adata)
    sc.tl.umap(_adata)
    sv.tl.velocity_embedding(_adata, basis='umap')
    sv.pl.velocity_embedding_stream(_adata, basis='umap', color='day')

In [None]:
adata.write_loom('day0_11_ann.loom', write_obsm_varm=1)

In [None]:
to_keep = ['IC1+', 'IC2+', 'IC10+']
ic_genes = {}
with open('../../data/ics_and_signatures.gmt') as f_in:
    for line in f_in:
        line = line[:-1].split('\t')
        if line[0] in to_keep:
            ic_genes[line[0]] = [g for g in line[2:] if g in adata.var_names]

In [None]:
adatas = []
for ic in to_keep:
    adatas.append(adata[:,ic_genes[ic]])
adatas.append(adata[:,ic_genes['IC1+'] + ic_genes['IC2+']])

In [None]:
path = '../../data/loom/%s.loom'
names = ['full', 'A267', 'A281', 'A295', 'A296']
adata = lp.combine([(path % name) for name in names], '6_files.loom', key='Accession')
adata