diff --git a/src/crowdnalysis/analysis.py b/src/crowdnalysis/analysis.py
index e9f0259..a8403c4 100644
--- a/src/crowdnalysis/analysis.py
+++ b/src/crowdnalysis/analysis.py
@@ -85,25 +85,6 @@ def prospective_analysis(question, expert_data_src, expert_parameters, parameter
             expert_parameters[question], crowds_parameters, models, measures, dgps, repeats=repeats))
 
 
-def gen_confusion_matrix_prob(consensus_ref: np.ndarray, consensus_compare: np.ndarray,
-                              d_ref: data.Data, question: str) -> pd.DataFrame:
-    """Compute confusion matrix for a consensus based on a ref consensus as the sum of probabilities for classes.
-
-    Cells values are the number of tasks. The values are merely the sum of probabilities for the corresponding
-    (class, label) pair. That's why we observe decimals.
-    """
-    best_ref = np.argmax(consensus_ref, axis=1)
-    categories = d_ref.get_categories()[question].categories.tolist()
-    label_names = dict(zip(range(len(categories)), categories))
-    # print("label_names:", label_names)
-    df_out = pd.DataFrame(consensus_compare).rename(columns=label_names)
-    df_out["Ground Truth"] = best_ref
-    df_out = df_out.groupby("Ground Truth").sum()
-    df_out = df_out.rename(index=label_names)
-    df_out = df_out.reindex(categories)  # In case there are labels which were not in `best_ref`
-    return df_out
-
-
 def gen_confusion_matrix(consensus_ref: np.ndarray, consensus_compare: np.ndarray,
                          d_ref: data.Data, question: str) -> pd.DataFrame:
     """Compute confusion matrix for a consensus based on a ref consensus.