In [5]:
# make sure that rpy2 is installed: https://rpy2.bitbucket.io/
%load_ext rpy2.ipython

import torch
import numpy as np
import pandas as pd
import scanpy as sc
import warnings
from rpy2.rinterface import RRuntimeWarning

# ignore R warning messages
warnings.filterwarnings("ignore", category=RRuntimeWarning)

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [6]:
%%R
library(splatter)

In [7]:
%%R

Simulation <- function(nGroups, nGenes, batchCells, mid, sigma){
  method <- 'groups'
  
  group.prob <- rep(1, nGroups) / nGroups
  
  sim <- splatSimulate(group.prob=group.prob, nGenes=nGenes, batchCells=batchCells,
                       dropout.type="experiment", method=method, seed=0,
                       dropout.shape=-1, dropout.mid=mid, de.facScale=sigma)
  
  counts     <- as.data.frame(as.matrix(t(counts(sim))))
  truecounts <- as.data.frame(t(assays(sim)$TrueCounts))
    
  cellinfo   <- as.data.frame(colData(sim))
  geneinfo   <- as.data.frame(rowData(sim))
  
  return(list('truecounts'=truecounts, 'counts'=counts, 'cellinfo'=cellinfo, 'geneinfo'=geneinfo))  
}

In [8]:
def save_simulated_data(name, truecounts, counts, cellinfo, geneinfo):
    true_zero_counts = (truecounts==0).sum().sum() 
    raw_zero_counts = (counts==0).sum().sum()
    true_non_zero_counts = (truecounts!=0).sum().sum()

    numerator = raw_zero_counts - true_zero_counts
    denominator = true_non_zero_counts
    dropout_rate = numerator / denominator
    dropout_rate = round(dropout_rate * 100,2)

    adata = sc.AnnData(counts.values, obs=cellinfo, var=geneinfo)
    adata.obs_names = cellinfo.Cell
    adata.var_names = geneinfo.Gene
    adata = adata[:, adata.var_names]
    adata.uns['dropout_rate'] = dropout_rate

    mapping_dict ={'Group1' : 1,
            'Group2' : 2,
            'Group3' : 3,
            'Group4' : 4,
            'Group5' : 5,
            'Group6' : 6,
            'Group7' : 7,
            'Group8' : 8,
            'Group9' : 9,
            'Group10' :10,
    }
    adata.obs['Group'].replace(mapping_dict, inplace=True)

    adata.obs.index.name = 'cell_id'
    adata.var.index.name = 'gene_id'

    sc.pp.filter_genes(adata, min_counts=1)
    sc.pp.filter_cells(adata, min_counts=1)

    path = f'./data/{name}'
    adata.write(path)

# 1) High Dropout rate

## 1-1) SimT3

### 1-1-1) mid = 0

In [5]:
name='SimT3_1'
mid=0.0
sigma=0.3
ngroup=3

In [9]:
%%R -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Simulation(nGroups=ngroup, nGenes=5000, batchCells=3000, mid=mid, sigma=sigma)
truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 2.03 * dense matrix

R[write to consol

In [10]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


### 1-1-2) mid = 0.5

In [12]:
name='SimT3_2'
mid=0.5
sigma=0.3
ngroup=3

In [13]:
%%R -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Simulation(nGroups=ngroup, nGenes=5000, batchCells=3000, mid=mid, sigma=sigma)
truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 2.03 * dense matrix

R[write to consol

In [14]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


### 1-1-3) mid = 1.0

In [15]:
name='SimT3_3'
mid=1.0
sigma=0.3
ngroup=3

In [16]:
%%R -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Simulation(nGroups=ngroup, nGenes=5000, batchCells=3000, mid=mid, sigma=sigma)
truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 2.03 * dense matrix

R[write to consol

In [17]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


### 1-1-4) mid = 1.5

In [18]:
name='SimT3_4'
mid=1.5
sigma=0.3
ngroup=3

In [19]:
%%R -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Simulation(nGroups=ngroup, nGenes=5000, batchCells=3000, mid=mid, sigma=sigma)
truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 2.03 * dense matrix

R[write to consol

In [20]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


### 1-1-5) mid = 2.0

In [21]:
name='SimT3_5'
mid=2.0
sigma=0.3
ngroup=3

In [22]:
%%R -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Simulation(nGroups=ngroup, nGenes=5000, batchCells=3000, mid=mid, sigma=sigma)
truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 2.03 * dense matrix

R[write to consol

In [23]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


## 1-2) SimT6

### 1-2-1) mid = 0

In [25]:
name='SimT6_1'
mid=0.0
sigma=0.3
ngroup=6

In [26]:
%%R -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Simulation(nGroups=ngroup, nGenes=5000, batchCells=3000, mid=mid, sigma=sigma)
truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 2.03 * dense matrix

R[write to consol

In [27]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


### 1-2-2) mid = 0.5

In [28]:
name='SimT6_2'
mid=0.5
sigma=0.3
ngroup=6

In [29]:
%%R -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Simulation(nGroups=ngroup, nGenes=5000, batchCells=3000, mid=mid, sigma=sigma)
truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 2.03 * dense matrix

R[write to consol

In [30]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


### 1-2-3) mid = 1.0

In [31]:
name='SimT6_3'
mid=1.0
sigma=0.3
ngroup=6

In [32]:
%%R -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Simulation(nGroups=ngroup, nGenes=5000, batchCells=3000, mid=mid, sigma=sigma)
truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 2.03 * dense matrix

R[write to consol

In [33]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


### 1-2-4) mid = 1.5

In [34]:
name='SimT6_4'
mid=1.5
sigma=0.3
ngroup=6

In [35]:
%%R -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Simulation(nGroups=ngroup, nGenes=5000, batchCells=3000, mid=mid, sigma=sigma)
truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 2.03 * dense matrix

R[write to consol

In [36]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


### 1-2-5) mid = 2.0

In [37]:
name='SimT6_5'
mid=2.0
sigma=0.3
ngroup=6

In [38]:
%%R -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Simulation(nGroups=ngroup, nGenes=5000, batchCells=3000, mid=mid, sigma=sigma)
truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 2.03 * dense matrix

R[write to consol

In [39]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


# 2) Low Signal

### 2-1) Sigma = 1.0

In [9]:
name='Sig1'
mid=0.0
sigma=0.1
ngroup=3

In [10]:
%%R -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Simulation(nGroups=ngroup, nGenes=5000, batchCells=3000, mid=mid, sigma=sigma)
truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 2.03 * dense matrix

R[write to consol

In [11]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


### 2-2) Sigma = 1.5

In [12]:
name='Sig2'
mid=0.0
sigma=1.5
ngroup=3

In [13]:
%%R -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Simulation(nGroups=ngroup, nGenes=5000, batchCells=3000, mid=mid, sigma=sigma)
truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 1.94 * dense matrix

R[write to consol

In [14]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


### 2-3) Sigma = 2.0

In [15]:
name='Sig3'
mid=0.0
sigma=2.0
ngroup=3

In [16]:
%%R -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Simulation(nGroups=ngroup, nGenes=5000, batchCells=3000, mid=mid, sigma=sigma)
truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 1.82 * dense matrix

R[write to consol

In [17]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


### 2-4) Sigma = 2.5

In [18]:
name='Sig4'
mid=0.0
sigma=2.5
ngroup=3

In [19]:
%%R -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Simulation(nGroups=ngroup, nGenes=5000, batchCells=3000, mid=mid, sigma=sigma)
truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 1.59 * dense matrix

R[write to consol

In [20]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


# 3) Imbalance

In [22]:
%%R

Imb_Simulation <- function(group_prob, nGroups, nGenes, batchCells, mid, sigma){
  method <- 'groups'
  
  group.prob <- c(group_prob)
  
  sim <- splatSimulate(group.prob=group.prob, nGenes=nGenes, batchCells=batchCells,
                       dropout.type="experiment", method=method, seed=0,
                       dropout.shape=-1, dropout.mid=mid, de.facScale=sigma)
  
  counts     <- as.data.frame(as.matrix(t(counts(sim))))
  truecounts <- as.data.frame(t(assays(sim)$TrueCounts))
    
  cellinfo   <- as.data.frame(colData(sim))
  geneinfo   <- as.data.frame(rowData(sim))
  
  return(list('truecounts'=truecounts, 'counts'=counts, 'cellinfo'=cellinfo, 'geneinfo'=geneinfo))  
}

In [23]:
def calculate_group_prob(retention, ngroup):
    b = (1-retention) / (ngroup -1)
    proportion = np.array([retention + i *b for i in range(ngroup)])
    group_prob = proportion / np.sum(proportion)

    return group_prob

### 3-1) retention = 0.1

In [26]:
name='Imb1'
mid=0.0
sigma=0.4
ngroup=6
retention=0.1
n_cells = 1500
group_prob = calculate_group_prob(retention=retention, ngroup=ngroup)
print(group_prob)

[0.03030303 0.08484848 0.13939394 0.19393939 0.24848485 0.3030303 ]


In [25]:
%%R -i group_prob -i n_cells -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Imb_Simulation(group_prob=group_prob, nGroups=ngroup, nGenes=5000, batchCells=n_cells, mid=mid, sigma=sigma)

truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 2.01 * dense matrix

R[write to consol

In [27]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


### 3-2) retention = 0.3

In [31]:
name='Imb2'
mid=0.0
sigma=0.4
ngroup=6
retention=0.3
n_cells = 1500
group_prob = calculate_group_prob(retention=retention, ngroup=ngroup)
print(group_prob)

[0.07692308 0.11282051 0.14871795 0.18461538 0.22051282 0.25641026]


In [32]:
%%R -i group_prob -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Imb_Simulation(group_prob=group_prob, nGroups=ngroup, nGenes=5000, batchCells=1500, mid=mid, sigma=sigma)

truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 2.03 * dense matrix

R[write to consol

In [33]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


### 3-3) retention = 0.5

In [34]:
name='Imb3'
mid=0.0
sigma=0.4
ngroup=6
retention=0.5
n_cells = 1500
group_prob = calculate_group_prob(retention=retention, ngroup=ngroup)
print(group_prob)

[0.11111111 0.13333333 0.15555556 0.17777778 0.2        0.22222222]


In [35]:
%%R -i group_prob -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Imb_Simulation(group_prob=group_prob, nGroups=ngroup, nGenes=5000, batchCells=1500, mid=mid, sigma=sigma)

truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 2.03 * dense matrix

R[write to consol

In [36]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


### 3-4) retention = 0.7

In [37]:
name='Imb4'
mid=0.0
sigma=0.4
ngroup=6
retention=0.7
n_cells = 1500
group_prob = calculate_group_prob(retention=retention, ngroup=ngroup)
print(group_prob)

[0.1372549  0.14901961 0.16078431 0.17254902 0.18431373 0.19607843]


In [38]:
%%R -i group_prob -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Imb_Simulation(group_prob=group_prob, nGroups=ngroup, nGenes=5000, batchCells=1500, mid=mid, sigma=sigma)

truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 2.03 * dense matrix

R[write to consol

In [39]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical


### 3-5) retention = 0.9

In [40]:
name='Imb5'
mid=0.0
sigma=0.4
ngroup=6
retention=0.9
n_cells = 1500
group_prob = calculate_group_prob(retention=retention, ngroup=ngroup)
print(group_prob)

[0.15789474 0.16140351 0.16491228 0.16842105 0.17192982 0.1754386 ]


In [41]:
%%R -i group_prob -i ngroup -i mid -i sigma -o counts -o truecounts -o geneinfo -o cellinfo

sce <- Imb_Simulation(group_prob=group_prob, nGroups=ngroup, nGenes=5000, batchCells=1500, mid=mid, sigma=sigma, seed=0)

truecounts <- sce$truecounts
counts <- sce$counts
cellinfo <- sce$cellinfo
geneinfo <- sce$geneinfo

R[write to console]: Getting parameters...

R[write to console]: Creating simulation object...

R[write to console]: Simulating library sizes...

R[write to console]: Simulating gene means...

R[write to console]: Simulating group DE...

R[write to console]: Simulating cell means...

R[write to console]: Simulating BCV...

R[write to console]: Simulating counts...

R[write to console]: Simulating dropout (if needed)...

R[write to console]: Sparsifying assays...

R[write to console]: Automatically converting to sparse matrices, threshold = 0.95

R[write to console]: Skipping 'BatchCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BaseCellMeans': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'BCV': estimated sparse size 1.5 * dense matrix

R[write to console]: Skipping 'CellMeans': estimated sparse size 1.49 * dense matrix

R[write to console]: Skipping 'TrueCounts': estimated sparse size 2.03 * dense matrix

R[write to consol

In [42]:
save_simulated_data(name, truecounts, counts, cellinfo, geneinfo)

Trying to set attribute `._uns` of view, copying.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Batch' as categorical
