Merge pull request #176 from KrishnaswamyLab/dev

MAGIC v2.0.3
KrishnaswamyLab · Nov 18, 2019 · fae72d9 · fae72d9
2 parents 8b923b6 + b8ccb6f
commit fae72d9
Show file tree

Hide file tree

Showing 11 changed files with 18,759 additions and 14,216 deletions.
diff --git a/Rmagic/DESCRIPTION b/Rmagic/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: Rmagic
 Type: Package
 Title: MAGIC - Markov Affinity-Based Graph Imputation of Cells
-Version: 2.0.2
+Version: 2.0.3
 Authors@R: c(person(given = "David", family = "van Dijk", email = "davidvandijk@gmail.com", role = c("aut")),
 		         person(given = 'Scott', family = 'Gigante', email = 'scott.gigante@yale.edu', role = 'cre', 
 		                comment = c(ORCID = '0000-0002-4544-2764')))

diff --git a/Rmagic/R/magic.R b/Rmagic/R/magic.R
@@ -9,14 +9,14 @@
 #' @param genes character or integer vector, default: NULL
 #' vector of column names or column indices for which to return smoothed data
 #' If 'all_genes' or NULL, the entire smoothed matrix is returned
-#' @param knn int, optional, default: 10
+#' @param knn int, optional, default: 5
 #' number of nearest neighbors on which to compute bandwidth
 #' @param knn.max int, optional, default: NULL
 #' maximum number of neighbors for each point. If NULL, defaults to 3*knn
-#' @param decay int, optional, default: 2
+#' @param decay int, optional, default: 1
 #' sets decay rate of kernel tails.
 #' If NULL, alpha decaying kernel is not used
-#' @param t int, optional, default: 'auto'
+#' @param t int, optional, default: 3
 #' power to which the diffusion operator is powered
 #' sets the level of diffusion. If 'auto', t is selected according to the
 #' Procrustes disparity of the diffused data.'
@@ -108,10 +108,10 @@ magic <- function(data, ...) {
 magic.default <- function(
   data,
   genes = NULL,
-  knn = 10,
+  knn = 5,
   knn.max = NULL,
-  decay = 2,
-  t = 'auto',
+  decay = 1,
+  t = 3,
   npca = 100,
   init = NULL,
   t.max = 20,
@@ -249,10 +249,10 @@ magic.default <- function(
 magic.seurat <- function(
   data,
   genes = NULL,
-  knn = 10,
+  knn = 5,
   knn.max = NULL,
-  decay = 2,
-  t = 'auto',
+  decay = 1,
+  t = 3,
   npca = 100,
   init = NULL,
   t.max = 20,
@@ -311,10 +311,10 @@ magic.Seurat <- function(
   data,
   assay = NULL,
   genes = NULL,
-  knn = 10,
+  knn = 5,
   knn.max = NULL,
-  decay = 2,
-  t = 'auto',
+  decay = 1,
+  t = 3,
   npca = 100,
   init = NULL,
   t.max = 20,

diff --git a/Rmagic/inst/examples/bonemarrow_tutorial.Rmd b/Rmagic/inst/examples/bonemarrow_tutorial.Rmd
@@ -123,7 +123,7 @@ As you can see, the gene-gene relationships are much clearer after MAGIC. These
 
 ### Rerunning MAGIC with new parameters
 
-The data is a little too smooth - we can decrease `t` from the automatic value to reduce the amount of diffusion. We pass the original result to the argument `init` to avoid recomputing intermediate steps.
+The data is a little too smooth - we can increase `t` from the default value of 3 to increase the amount of diffusion. We pass the original result to the argument `init` to avoid recomputing intermediate steps.
 
 ```{r decrease_t}
 bmmsc_MAGIC <- magic(bmmsc, genes=c("Mpo", "Klf1", "Ifitm1"), 
@@ -141,7 +141,7 @@ We can visualize the results of MAGIC on PCA with `genes="pca_only"`.
 ```{r run_pca}
 bmmsc_MAGIC_PCA <- magic(bmmsc, genes="pca_only", 
                          t=4, init=bmmsc_MAGIC)
-ggplot(bmmsc_MAGIC_PCA) +
+# ggplot(bmmsc_MAGIC_PCA) +
   geom_point(aes(x=PC1, y=PC2, color=bmmsc_MAGIC$result$Klf1)) +
   scale_color_viridis(option="B") +
   labs(color="Klf1")
@@ -154,7 +154,7 @@ ggsave('BMMSC_data_R_pca_colored_by_magic.png', width=5, height=5)
 We can visualize the results of MAGIC on PHATE as follows.
 
 ```{r run_phate}
-bmmsc_PHATE <- phate(bmmsc, knn=4, decay=100, t=20)
+bmmsc_PHATE <- phate(bmmsc)
 ggplot(bmmsc_PHATE) +
   geom_point(aes(x=PHATE1, y=PHATE2, color=bmmsc_MAGIC$result$Klf1)) +
   scale_color_viridis(option="B") +

diff --git a/Rmagic/inst/examples/emt_tutorial.Rmd b/Rmagic/inst/examples/emt_tutorial.Rmd
@@ -103,11 +103,11 @@ if (FALSE) {
 
 ### Running MAGIC
 
-Running MAGIC is as simple as running the `magic` function. Because this dataset is rather large, we can increase `knn` from the default of 10 up to 15.
+Running MAGIC is as simple as running the `magic` function. Because this dataset is rather small, we can decrease `knn` from the default of 5 down to 3.
 
 ```{r run_magic}
 # run MAGIC
-data_MAGIC <- magic(data, knn=15, genes=c("VIM", "CDH1", "ZEB1"))
+data_MAGIC <- magic(data, knn=3, genes=c("VIM", "CDH1", "ZEB1"))
 ```
 
 We can plot the data before and after MAGIC to visualize the results.

diff --git a/Rmagic/man/magic.Rd b/Rmagic/man/magic.Rd
diff --git a/magic.gif b/magic.gif
diff --git a/python/magic/magic.py b/python/magic/magic.py
@@ -43,18 +43,18 @@ class MAGIC(BaseEstimator):
     Parameters
     ----------
 
-    knn : int, optional, default: 10
+    knn : int, optional, default: 5
         number of nearest neighbors from which to compute kernel bandwidth
 
     knn_max : int, optional, default: None
         maximum number of nearest neighbors with nonzero connection.
         If `None`, will be set to 3 * `knn`
 
-    decay : int, optional, default: 2
+    decay : int, optional, default: 1
         sets decay rate of kernel tails.
         If None, alpha decaying kernel is not used
 
-    t : int, optional, default: 'auto'
+    t : int, optional, default: 3
         power to which the diffusion operator is powered.
         This sets the level of diffusion. If 'auto', t is selected
         according to the Procrustes disparity of the diffused data
@@ -144,10 +144,10 @@ class MAGIC(BaseEstimator):
 
     def __init__(
         self,
-        knn=10,
+        knn=5,
         knn_max=None,
-        decay=2,
-        t="auto",
+        decay=1,
+        t=3,
         n_pca=100,
         solver="exact",
         knn_dist="euclidean",
@@ -158,8 +158,18 @@ def __init__(
         a=None,
     ):
         if k is not None:
+            warnings.warn(
+                "Parameter `k` is deprecated and will be removed"
+                " in a future version. Use `knn` instead",
+                FutureWarning,
+            )
             knn = k
         if a is not None:
+            warnings.warn(
+                "Parameter `a` is deprecated and will be removed"
+                " in a future version. Use `decay` instead",
+                FutureWarning,
+            )
             decay = a
         self.knn = knn
         self.knn_max = knn_max
@@ -175,7 +185,7 @@ def __init__(
         self.X_magic = None
         self._check_params()
         self.verbose = verbose
-        tasklogger.set_level(verbose)
+        _logger.set_level(verbose)
 
     @property
     def knn_max(self):
@@ -272,14 +282,14 @@ def set_params(self, **params):
         Parameters
         ----------
 
-        knn : int, optional, default: 10
+        knn : int, optional, default: 5
             number of nearest neighbors on which to build kernel
 
-        decay : int, optional, default: 15
+        decay : int, optional, default: 1
             sets decay rate of kernel tails.
             If None, alpha decaying kernel is not used
 
-        t : int, optional, default: 'auto'
+        t : int, optional, default: 3
             power to which the diffusion operator is powered.
             This sets the level of diffusion. If 'auto', t is selected
             according to the R squared of the diffused data
@@ -328,10 +338,20 @@ def set_params(self, **params):
 
         # kernel parameters
         if "k" in params and params["k"] != self.knn:
+            warnings.warn(
+                "Parameter `k` is deprecated and will be removed"
+                " in a future version. Use `knn` instead",
+                FutureWarning,
+            )
             self.knn = params["k"]
             reset_kernel = True
             del params["k"]
         if "a" in params and params["a"] != self.decay:
+            warnings.warn(
+                "Parameter `a` is deprecated and will be removed"
+                " in a future version. Use `decay` instead",
+                FutureWarning,
+            )
             self.decay = params["a"]
             reset_kernel = True
             del params["a"]
@@ -584,14 +604,16 @@ def transform(self, X=None, genes=None, t_max=20, plot_optimal_t=False, ax=None)
         if isinstance(genes, str) and genes == "pca_only":
             # have to use PCA to return it
             solver = "approximate"
+        elif (
+            genes is not None
+            and self.X_magic is None
+            and len(genes) < self.graph.data_nu.shape[1]
+        ):
+            # faster to skip PCA
+            solver = "exact"
+            store_result = False
         else:
-            if genes is not None and self.X_magic is None:
-                if len(genes) < self.graph.data_nu.shape[1]:
-                    # faster to skip PCA
-                    solver = "exact"
-                    store_result = False
-            else:
-                solver = self.solver
+            solver = self.solver
 
         if store_result and self.X_magic is not None:
             X_magic = self.X_magic
@@ -608,6 +630,13 @@ def transform(self, X=None, genes=None, t_max=20, plot_optimal_t=False, ax=None)
                     isinstance(genes, str) and genes != "pca_only"
                 ):
                     X_input = scprep.select.select_cols(X_input, idx=genes)
+            if solver == "exact" and X_input.shape[1] > 6000:
+                _logger.warning(
+                    "Running MAGIC with `solver='exact'` on "
+                    "{}-dimensional data may take a long time. "
+                    "Consider denoising specific genes with `genes=<list-like>` or using "
+                    "`solver='approximate'`.".format(X_input.shape[1])
+                )
             X_magic = self._impute(X_input, t_max=t_max, plot=plot_optimal_t, ax=ax)
             if store_result:
                 self.X_magic = X_magic

diff --git a/python/magic/plot.py b/python/magic/plot.py
@@ -33,6 +33,7 @@ def animate_magic(
     gene_y,
     gene_color=None,
     t_max=20,
+    delay=2,
     operator=None,
     filename=None,
     ax=None,
@@ -59,6 +60,8 @@ def animate_magic(
         Gene to color by. If None, no color vector is used
     t_max : int, optional (default: 20)
         maximum value of t to include in the animation
+    delay : int, optional (default: 5)
+        number of frames to dwell on the first frame before applying MAGIC
     operator : magic.MAGIC, optional (default: None)
         precomputed MAGIC operator. If None, one is created.
     filename : str, optional (default: None)
@@ -149,11 +152,16 @@ def init():
         return ax
 
     def animate(i):
+        i = max(i - delay, 0)
         data_t = data_magic[i]
         data_t = data_t if isinstance(data, pd.DataFrame) else data_t.T
         sc.set_offsets(np.array([data_t[gene_x], data_t[gene_y]]).T)
-        ax.set_xlim([np.min(data_t[gene_x]), np.max(data_t[gene_x])])
-        ax.set_ylim([np.min(data_t[gene_y]), np.max(data_t[gene_y])])
+        xlim = np.min(data_t[gene_x]), np.max(data_t[gene_x])
+        xrange = xlim[1] - xlim[0]
+        ax.set_xlim(xlim[0] - xrange / 10, xlim[1] + xrange / 10)
+        ylim = np.min(data_t[gene_y]), np.max(data_t[gene_y])
+        yrange = ylim[1] - ylim[0]
+        ax.set_ylim(ylim[0] - yrange / 10, ylim[1] + yrange / 10)
         ax.set_title("t = {}".format(i))
         if gene_color is not None:
             color_t = data_t[gene_color]
@@ -166,7 +174,7 @@ def animate(i):
         fig,
         animate,
         init_func=init,
-        frames=range(t_max + 1),
+        frames=range(t_max + delay + 1),
         interval=interval,
         blit=False,
     )

diff --git a/python/magic/version.py b/python/magic/version.py
@@ -1 +1 @@
-__version__ = "2.0.2"
+__version__ = "2.0.3"