Novartis · CaibinSh · May 19, 2022 · May 19, 2022 · May 19, 2022
diff --git a/scar/main/__main__.py b/scar/main/__main__.py
@@ -31,6 +31,7 @@ def main():
     adjust = args.adjust
     cutoff = args.cutoff
     moi = args.moi
+    round_to_int = args.round
     count_matrix = pd.read_pickle(count_matrix_path)
 
     print("===========================================")
@@ -64,7 +65,7 @@ def main():
         save_model=save_model,
     )
 
-    scar_model.inference(adjust=adjust)
+    scar_model.inference(adjust=adjust, round_to_int=round_to_int)
 
     if feature_type.lower() in ["sgrna", "sgrnas", "tag", "tags"]:
         scar_model.assignment(cutoff=cutoff, moi=moi)
@@ -213,14 +214,20 @@ def scar_parser():
         'global' -- adjust the estimated native counts globally.
         False -- no adjustment, use the model-returned native counts.""",
     )
-
     parser.add_argument(
         "-cutoff",
         "--cutoff",
         type=float,
         default=3,
         help="cutoff for Bayesfactors. See https://doi.org/10.1007/s42113-019-00070-x.",
     )
+    parser.add_argument(
+        "-round",
+        "--round2int",
+        type=str,
+        default="stochastic_rounding",
+        help="whether to round the counts",
+    )
     parser.add_argument(
         "-moi",
         "--moi",

diff --git a/scar/main/_scar.py b/scar/main/_scar.py
@@ -530,13 +530,13 @@ def train(
     # Inference
     @torch.no_grad()
     def inference(
-        self, batch_size=None, count_model_inf="poisson", adjust="micro", cutoff=3, moi=None
+        self, batch_size=None, count_model_inf="poisson", adjust="micro", cutoff=3, round_to_int="stochastic_rounding", moi=None
     ):
         """inference infering the expected native signals, noise ratios, Bayesfactors and expected native frequencies
 
         Parameters
         ----------
-        batch_size : _type_, optional
+        batch_size : int, optional
             batch size, set a small value upon GPU memory issue, by default None
         count_model_inf : str, optional
             inference model for evaluation of ambient presence, by default "poisson"
@@ -551,7 +551,9 @@ def inference(
                         Defaults to "micro", by default "micro"
         cutoff : int, optional
             cutoff for Bayesfactors, by default 3
-        moi : _type_, optional (under development) \
+        round_to_int : str, optional
+            whether to round the counts, by default "stochastic_rounding"
+        moi : int, optional (under development) \
             multiplicity of infection. If assigned, it will allow optimized thresholding, \
                 which tests a series of cutoffs to find the best one \
                     based on distributions of infections under given moi.\
@@ -594,6 +596,7 @@ def inference(
                 ambient_freq_tot[0, :],
                 count_model_inf=count_model_inf,
                 adjust=adjust,
+                round_to_int=round_to_int
             )
             self.native_counts[
                 i * batch_size : i * batch_size + minibatch_size, :

diff --git a/scar/main/_vae.py b/scar/main/_vae.py
@@ -95,7 +95,12 @@ def forward(self, input_matrix):
 
     @torch.no_grad()
     def inference(
-        self, input_matrix, amb_prob, count_model_inf="poisson", adjust="micro"
+        self,
+        input_matrix,
+        amb_prob,
+        count_model_inf="poisson",
+        adjust="micro",
+        round_to_int="stochastic_rounding"
     ):
         """
         Inference of presence of native signals
@@ -117,6 +122,27 @@ def inference(
         expected_amb_counts = total_count_per_cell * noise_ratio * amb_prob
         tot_amb = expected_amb_counts.sum(axis=1).reshape(-1, 1)
 
+        if round_to_int.lower() == "stochastic_rounding":
+            expected_native_counts = (
+                np.floor(expected_native_counts)
+                + np.random.binomial(
+                    1,
+                    expected_native_counts - np.floor(expected_native_counts),
+                    expected_native_counts.shape,
+                )
+            ).astype(int)
+
+            expected_amb_counts = (
+                np.floor(expected_amb_counts)
+                + np.random.binomial(
+                    1,
+                    expected_amb_counts - np.floor(expected_amb_counts),
+                    expected_amb_counts.shape,
+                )
+            ).astype(int)
+        elif round_to_int is None:
+            pass
+
         if not adjust:
             adjust = 0
         elif adjust == "global":