In [None]:
import numpy as np
import scipy as sp
import scipy.stats as stats
import matplotlib.pyplot as plt
import scanpy as sc
import pandas as pd

Linux

In [None]:
data = sc.read_h5ad("/home/alirassolie/Documents/120521_adata_celltype_prevpost_and_markers")
data_filtered = data[data.obs.annotations.str.contains("(?i)sat|myo")]
muscvar = sc.read_h5ad('/home/alirassolie/Documents/misc/adata/160720_muscvar.h5ad')
muscvar = muscvar[~muscvar.obs.annotations.str.contains("19")]

Windows

In [None]:
data = sc.read_h5ad(r'C:\Users\Ali\Downloads\OneDrive_1_9-29-2021\120521_adata_celltype_prevpost_and_markers')
data_filtered = data[data.obs.annotations.str.contains("(?i)sat|myo")]
muscvar = sc.read_h5ad(r'C:\Users\Ali\Downloads\OneDrive_1_9-29-2021\160720_muscvar.h5ad')
muscvar = muscvar[~muscvar.obs.annotations.str.contains("19")]

In [None]:
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42

Functions

In [None]:
def ECDF(one_d: np.array, sort:bool=False, perc:bool=False, ax=None, **kwargs):
    """ECDF function will produce a scatterplot and return 
    the resulting np.array of the sorted data, with the ECDF
    calculated

    Arguments:
        
        one_d: a np.array, a vector containing the data to be ECDF'd

        sort: boolean, if the input vector should be sorted before
        processed

        perc: boolean, if the ECDF should present the cumulative sum 
        as values of a quotient range(0,1)
    """

    if sort: one_d = one_d[one_d.argsort()];
    one_d_shifted = one_d + np.abs(min(one_d));
    cum = np.full(one_d.shape[0], 0.0);
    total = sum(one_d_shifted);

    if perc:
        for i in range(cum.shape[0]):
            cum[i] = sum(one_d_shifted[:i]) / total;
    else:
        for i in range(cum.shape[0]):
            cum[i] = sum(one_d_shifted[:i]);
    if not ax:
        plt.scatter(one_d_shifted, cum, **kwargs);
        plt.show();
    elif ax:
        ax.scatter(one_d_shifted, cum, **kwargs);
    return (one_d_shifted, cum)

In [None]:
sc.tl.pca(data_filtered)
sc.pl.pca(data_filtered)

In [None]:
fig, ax = plt.subplots()
df = pd.DataFrame({
    'x': muscvar[(muscvar.obs.annotations.str.contains("12")) & (muscvar.obs.batch == "0")].obsm["X_pca"].T[0]*-1,
})

df.plot.kde(ax=ax)
df = pd.DataFrame({
'y': muscvar[(muscvar.obs.annotations.str.contains("12")) & (muscvar.obs.batch == "1")].obsm["X_pca"].T[0]*-1
})

df.plot.kde(ax=ax)
    

In [None]:
fig, ax = plt.subplots()
df = pd.DataFrame({
    'x': muscvar[(muscvar.obs.annotations.str.contains("3")) & (muscvar.obs.batch == "0")].obsm["X_pca"].T[0]*-1,
})

df.plot.kde(ax=ax)
df = pd.DataFrame({
'y': muscvar[(muscvar.obs.annotations.str.contains("3")) & (muscvar.obs.batch == "1")].obsm["X_pca"].T[0]*-1
})

df.plot.kde(ax=ax)
    

In [None]:
cl12 = muscvar[muscvar.obs.annotations.str.contains("12")]
cl3 = muscvar[muscvar.obs.annotations.str.contains("3")]
sat = muscvar[muscvar.obs.annotations.str.contains("Sat")]
batch_12, batch_3, sat_batch = [], [], []

for i in range(2): 
    batch_12.append(cl12[cl12.obs.batch == f"{i}"])
    batch_3.append(cl3[cl3.obs.batch == f"{i}"])
    sat_batch.append(sat[sat.obs.batch == f"{i}"])

In [None]:
cl12_pca = pd.DataFrame(cl12.obsm["X_pca"]).loc[:, :1]
cl12_pca["batch"] = cl12.obs.batch.values

cl3_pca = pd.DataFrame(cl3.obsm["X_pca"]).loc[:, :1]
cl3_pca["batch"] = cl3.obs.batch.values

sat_pca = pd.DataFrame(sat.obsm["X_pca"]).loc[:, :1]
sat_pca["batch"] = sat.obs.batch.values

In [None]:
convert = lambda bat, offset: np.array(bat.obsm["X_pca"].T[0]*-1 + np.sqrt(min(offset.obsm["X_pca"].T[0]*-1)**2))

def plot_ECDF(df, offset, axs, ind):
    pc_df_fast = convert(df, offset)
    values, base = np.histogram(pc_df_fast, bins=20)
    cumulative = np.cumsum(values)
    axs[ind].plot(base[:-1], np.divide(cumulative, cumulative[-1]), linewidth=1)
    return cumulative

In [None]:
sc.pl.umap(batch_3[0], color="annotations")
sc.pl.umap(batch_3[1], color="annotations")

In [None]:


fig, axs = plt.subplots(ncols=3, nrows=1, figsize=(12,4))
# axs = axs.flatten()

sat_cum_0 = plot_ECDF(sat_batch[0], sat, axs, 0)
sat_cum_1 = plot_ECDF(sat_batch[1], sat, axs, 0)

fast_cum_0 = plot_ECDF(batch_12[0], cl12, axs, 1)
fast_cum_1 = plot_ECDF(batch_12[1], cl12, axs, 1)

slow_cum_0 = plot_ECDF(batch_3[0], cl3, axs, 2)
slow_cum_1 = plot_ECDF(batch_3[1], cl3, axs, 2)
    

axs[0].set_xlabel("Satellites")
axs[1].set_xlabel("Fast-twitch")    
axs[2].set_xlabel("Slow-twitch")


# plt.savefig("/home/alirassolie/Documents/misc/cumsum_fast_slow_sat.pdf")

plt.tight_layout()

In [None]:
sp.stats.wilcoxon(sat_cum_0, sat_cum_1)

In [None]:
sp.stats.wilcoxon(fast_cum_0, fast_cum_1)

In [None]:
sp.stats.wilcoxon(slow_cum_0, slow_cum_1)

In [None]:
print(sat_cum_1.shape)
print(slow_cum_1.shape)

In [None]:
# The raw coordinate PC1 values

In [None]:
sat_post = convert(sat_batch[1], sat)
sat_pre = convert(sat_batch[0], sat)

fast_post = convert(batch_12[1], cl12)
fast_pre = convert(batch_12[0], cl12)

slow_post = convert(batch_3[1], cl3)
slow_pre = convert(batch_3[0], cl3)

In [None]:
print(sp.stats.ranksums(sat_post, sat_pre))
print(f"The mean quotient: {np.mean(sat_post) / np.mean(sat_pre)}")

In [None]:
print(sp.stats.ranksums(fast_post, fast_pre))
print(f"The mean quotient: {np.mean(fast_post) / np.mean(fast_pre)}")

In [None]:
print(sp.stats.ranksums(slow_post, slow_pre))
print(f"The mean quotient: {np.mean(slow_post) / np.mean(slow_pre)}")

### Median

In [None]:
print('fast-twitch pre median and mean: ', np.median(fast_pre), np.mean(fast_pre))
print('fast-twitch post median and mean: ', np.median(fast_post), np.mean(fast_post))

print()

print('slow-twitch pre median and mean: ', np.median(slow_pre), np.mean(slow_pre))
print('slow-twitch post median and mean: ', np.median(slow_post), np.mean(slow_post))