diff --git a/backend/requirements.txt b/backend/requirements.txt
index b882d5947c..dd11aa18e0 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -5,6 +5,7 @@ numpy==1.22.4
 psycopg2-binary==2.9.5
 pydantic==1.9.1
 python-dotenv==0.21.0
+scipy==1.8.1
 SQLAlchemy==1.4.41
 sqlmodel==0.0.8
 starlette==0.22.0
diff --git a/scripts/postprocessing/infogain_selector.py b/scripts/postprocessing/infogain_selector.py
new file mode 100644
index 0000000000..51f60fa7d1
--- /dev/null
+++ b/scripts/postprocessing/infogain_selector.py
@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+from scipy import log2
+from scipy.integrate import nquad
+from scipy.special import gammaln, psi
+from scipy.stats import dirichlet
+
+
+def make_range(*x):
+    """
+    constructs leftover values for the simplex given the first k entries
+    (0,x_k) = 1-(x_1+...+x_(k-1))
+    """
+    return (0, max(0, 1 - sum(x)))
+
+
+def relative_entropy(p, q):
+    """
+    relative entropy of the two given dirichlet distributions
+    """
+
+    def tmp(*x):
+        """
+        First adds the last always forced entry to the input (the last x_last = 1-(x_1+...+x_(N)) )
+        Then computes the relative entropy of posterior and prior for that datapoint
+        """
+        x_new = np.append(x, 1 - sum(x))
+        return p(x_new) * log2(p(x_new) / q(x_new))
+
+    return tmp
+
+
+def naive_monte_carlo_integral(fun, dim, samples=10_000_000):
+    s = np.random.rand(dim - 1, samples)
+    s = np.sort(np.concatenate((np.zeros((1, samples)), s, np.ones((1, samples)))), 0)
+    # print(s)
+    pos = np.diff(s, axis=0)
+    # print(pos)
+    res = fun(pos)
+    return np.mean(res)
+
+
+def analytic_solution(a_post, a_prior):
+    """
+    Analytic solution to the KL-divergence between two dirichlet distributions.
+    Proof is in the Notion design doc.
+    """
+    post_sum = np.sum(a_post)
+    prior_sum = np.sum(a_prior)
+    info = (
+        gammaln(post_sum)
+        - gammaln(prior_sum)
+        - np.sum(gammaln(a_post))
+        + np.sum(gammaln(a_prior))
+        - np.sum((a_post - a_prior) * (psi(a_post) - psi(post_sum)))
+    )
+
+    return info
+
+
+def infogain(a_post, a_prior):
+    raise (
+        """For the love of good don't use this:
+    it's insanely poorly conditioned, the worst numerical code I have ever written
+    and it's slow as molasses. Use the analytic solution instead.
+
+    Maybe remove
+    """
+    )
+    args = len(a_prior)
+    p = dirichlet(a_post).pdf
+    q = dirichlet(a_prior).pdf
+    (info, _) = nquad(relative_entropy(p, q), [make_range for _ in range(args - 1)], opts={"epsabs": 1e-8})
+    # info = naive_monte_carlo_integral(relative_entropy(p,q), len(a_post))
+    return info
+
+
+def uniform_expected_infogain(a_prior):
+    mean_weight = dirichlet.mean(a_prior)
+    print("weight", mean_weight)
+    results = []
+    for i, w in enumerate(mean_weight):
+        a_post = a_prior.copy()
+        a_post[i] = a_post[i] + 1
+        results.append(w * analytic_solution(a_post, a_prior))
+    return np.sum(results)
+
+
+if __name__ == "__main__":
+    a_prior = np.array([1, 1, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
+    a_post = np.array([1, 1, 20, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
+
+    print("algebraic", analytic_solution(a_post, a_prior))
+    # print("raw",infogain(a_post, a_prior))
+    print("large infogain", uniform_expected_infogain(a_prior))
+    print("post infogain", uniform_expected_infogain(a_post))
+    # a_prior = np.array([1,1,1000])
+    # print("small infogain",uniform_expected_infogain(a_prior))
diff --git a/scripts/postprocessing/scoring.py b/scripts/postprocessing/scoring.py
new file mode 100644
index 0000000000..3c145b285b
--- /dev/null
+++ b/scripts/postprocessing/scoring.py
@@ -0,0 +1,183 @@
+# -*- coding: utf-8 -*-
+from dataclasses import dataclass, replace
+from typing import Any
+
+import numpy as np
+import numpy.typing as npt
+from scipy.stats import kendalltau
+
+
+@dataclass
+class Voter:
+    """
+    Represents a single voter.
+    This tabulates the number of good votes, total votes,
+    and points.
+    We only put well-behaved people on the scoreboard and filter out the badly behaved ones
+    """
+
+    uid: Any
+    num_votes: int
+    num_good_votes: int
+    num_prompts: int
+    num_good_prompts: int
+    num_rankings: int
+    num_good_rankings: int
+
+    #####################
+    voting_points: int
+    prompt_points: int
+    ranking_points: int
+
+    def voter_quality(self):
+        return self.num_good_votes / self.num_votes
+
+    def rank_quality(self):
+        return self.num_good_rankings / self.num_rankings
+
+    def prompt_quality(self):
+        return self.num_good_prompts / self.num_prompts
+
+    def is_well_behaved(self, threshhold_vote, threshhold_prompt, threshhold_rank):
+        return (
+            self.voter_quality() > threshhold_vote
+            and self.prompt_quality() > threshhold_prompt
+            and self.rank_quality() > threshhold_rank
+        )
+
+    def total_points(self, voting_weight, prompt_weight, ranking_weight):
+        return (
+            voting_weight * self.voting_points
+            + prompt_weight * self.prompt_points
+            + ranking_weight * self.ranking_points
+        )
+
+
+def score_update_votes(new_vote: int, consensus: npt.ArrayLike, voter_data: Voter) -> Voter:
+    """
+    This function returns the new "quality score" and points for a voter,
+    after that voter cast a vote on a question.
+
+    This function is only to be run when archiving a question
+    i.e. the question has had sufficiently many votes, or we cann't get more than "K" bits of information
+
+    The consensus is the array of all votes cast by all voters for that question
+    We then update the voter data using the new information
+
+        Parameters:
+            new_vote (int): the index of the vote cast by the voter
+            consensus (ArrayLike): all votes cast for this question
+            voter_data (Voter): a "Voter" object that represents the person casting the "new_vote"
+
+        Returns:
+            updated_voter (Voter): the new "quality score" and points for the voter
+    """
+    # produces the ranking of votes, e.g. for [100,300,200] it returns [0, 2, 1],
+    # since 100 is the lowest, 300 the highest and 200 the middle value
+    consensus_ranking = np.argsort(np.argsort(consensus))
+    new_points = consensus_ranking[new_vote] + voter_data.voting_points
+
+    # we need to correct for 0 indexing, if you are closer to "right" than "wrong" of the conensus,
+    # it's a good vote
+    new_good_votes = int(consensus_ranking[new_vote] > (len(consensus) - 1) / 2) + voter_data.num_good_votes
+    new_num_votes = voter_data.num_votes + 1
+    return replace(voter_data, num_votes=new_num_votes, num_good_votes=new_good_votes, voting_points=new_points)
+
+
+def score_update_prompts(consensus: npt.ArrayLike, voter_data: Voter) -> Voter:
+    """
+    This function returns the gain of points for a given prompt's votes
+
+    This function is only to be run when archiving a question
+    i.e. the question has had sufficiently many votes, or we cann't get more than "K" bits of information
+
+    Parameters:
+            consensus (ArrayLike): all votes cast for this question
+            voter_data (Voter): a "Voter" object that represents the person that wrote the prompt
+
+        Returns:
+            updated_voter (Voter): the new "quality score" and points for the voter
+    """
+    # produces the ranking of votes, e.g. for [100,300,200] it returns [0, 2, 1],
+    # since 100 is the lowest, 300 the highest and 200 the middle value
+    consensus_ranking = np.arange(len(consensus)) - len(consensus) // 2 + 1
+    delta_votes = np.sum(consensus_ranking * consensus)
+    new_points = delta_votes + voter_data.prompt_points
+
+    # we need to correct for 0 indexing, if you are closer to "right" than "wrong" of the conensus,
+    # it's a good vote
+    new_good_prompts = int(delta_votes > 0) + voter_data.num_good_prompts
+    new_num_prompts = voter_data.num_prompts + 1
+    return replace(
+        voter_data,
+        num_prompts=new_num_prompts,
+        num_good_prompts=new_good_prompts,
+        prompt_points=new_points,
+    )
+
+
+def score_update_ranking(user_ranking: npt.ArrayLike, consensus_ranking: npt.ArrayLike, voter_data: Voter) -> Voter:
+    """
+    This function returns the gain of points for a given ranking's votes
+
+    This function is only to be run when archiving a question
+    i.e. the question has had sufficiently many votes, or we cann't get more than "K" bits of information
+
+    we use the bubble-sort distance (or "kendall-tau" distance) to compare the two rankings
+    we use this over spearman correlation since:
+        "[Kendall's τ] approaches a normal distribution more rapidly than ρ, as N, the sample size, increases;
+            and τ is also more tractable mathematically, particularly when ties are present"
+    Gilpin, A. R. (1993). Table for conversion of Kendall's Tau to Spearman's
+     Rho within the context measures of magnitude of effect for meta-analysis
+
+    Further in
+        "research design and statistical analyses, second edition, 2003"
+    the authors note that at least from an significance test POV they will yield the same p-values
+
+    Parameters:
+            user_ranking (ArrayLike): ranking produced by the user
+            consensus (ArrayLike): ranking produced after running the voting algorithm to merge into the consensus ranking
+            voter_data (Voter): a "Voter" object that represents the person that wrote the prompt
+
+        Returns:
+            updated_voter (Voter): the new "quality score" and points for the voter
+    """
+    bubble_sort_distance, p_value = kendalltau(user_ranking, consensus_ranking)
+    # normalize kendall-tau from [-1,1] into [0,1] range
+    bubble_sort_distance = (1 + bubble_sort_distance) / 2
+    new_points = bubble_sort_distance + voter_data.ranking_points
+    new_good_rankings = int(bubble_sort_distance > 0.5) + voter_data.num_good_rankings
+    new_num_rankings = voter_data.num_rankings + 1
+    return replace(
+        voter_data,
+        num_rankings=new_num_rankings,
+        num_good_rankings=new_good_rankings,
+        ranking_points=new_points,
+    )
+
+
+if __name__ == "__main__":
+    demo_voter = Voter(
+        "abc",
+        num_votes=10,
+        num_good_votes=2,
+        num_prompts=10,
+        num_good_prompts=2,
+        num_rankings=10,
+        num_good_rankings=2,
+        voting_points=6,
+        prompt_points=0,
+        ranking_points=0,
+    )
+    new_vote = 3
+    consensus = np.array([200, 300, 100, 500])
+    print(demo_voter)
+    print("best   vote  ", score_update_votes(new_vote, consensus, demo_voter))
+    new_vote = 2
+    print("worst  vote  ", score_update_votes(new_vote, consensus, demo_voter))
+    new_vote = 1
+    print("medium vote  ", score_update_votes(new_vote, consensus, demo_voter))
+    print("prompt writer", score_update_prompts(consensus, demo_voter))
+    print("best   rank  ", score_update_ranking(np.array([0, 2, 1]), np.array([0, 2, 1]), demo_voter))
+    print("medium rank  ", score_update_ranking(np.array([2, 0, 1]), np.array([0, 2, 1]), demo_voter))
+    print("worst  rank  ", score_update_ranking(np.array([1, 0, 2]), np.array([0, 2, 1]), demo_voter))