Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

user scoring and question selection #57

Merged
merged 7 commits into from
Dec 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ numpy==1.22.4
psycopg2-binary==2.9.5
pydantic==1.9.1
python-dotenv==0.21.0
scipy==1.8.1
SQLAlchemy==1.4.41
sqlmodel==0.0.8
starlette==0.22.0
Expand Down
98 changes: 98 additions & 0 deletions scripts/postprocessing/infogain_selector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# -*- coding: utf-8 -*-
import numpy as np
from scipy import log2
from scipy.integrate import nquad
from scipy.special import gammaln, psi
from scipy.stats import dirichlet


def make_range(*x):
"""
constructs leftover values for the simplex given the first k entries
(0,x_k) = 1-(x_1+...+x_(k-1))
"""
return (0, max(0, 1 - sum(x)))


def relative_entropy(p, q):
andreaskoepf marked this conversation as resolved.
Show resolved Hide resolved
"""
relative entropy of the two given dirichlet distributions
"""

def tmp(*x):
"""
First adds the last always forced entry to the input (the last x_last = 1-(x_1+...+x_(N)) )
Then computes the relative entropy of posterior and prior for that datapoint
"""
x_new = np.append(x, 1 - sum(x))
return p(x_new) * log2(p(x_new) / q(x_new))

return tmp


def naive_monte_carlo_integral(fun, dim, samples=10_000_000):
s = np.random.rand(dim - 1, samples)
s = np.sort(np.concatenate((np.zeros((1, samples)), s, np.ones((1, samples)))), 0)
# print(s)
pos = np.diff(s, axis=0)
# print(pos)
res = fun(pos)
return np.mean(res)


def analytic_solution(a_post, a_prior):
"""
Analytic solution to the KL-divergence between two dirichlet distributions.
Proof is in the Notion design doc.
"""
post_sum = np.sum(a_post)
prior_sum = np.sum(a_prior)
info = (
gammaln(post_sum)
- gammaln(prior_sum)
- np.sum(gammaln(a_post))
+ np.sum(gammaln(a_prior))
- np.sum((a_post - a_prior) * (psi(a_post) - psi(post_sum)))
)

return info


def infogain(a_post, a_prior):
raise (
"""For the love of good don't use this:
it's insanely poorly conditioned, the worst numerical code I have ever written
and it's slow as molasses. Use the analytic solution instead.

Maybe remove
"""
)
args = len(a_prior)
p = dirichlet(a_post).pdf
q = dirichlet(a_prior).pdf
(info, _) = nquad(relative_entropy(p, q), [make_range for _ in range(args - 1)], opts={"epsabs": 1e-8})
# info = naive_monte_carlo_integral(relative_entropy(p,q), len(a_post))
return info


def uniform_expected_infogain(a_prior):
mean_weight = dirichlet.mean(a_prior)
print("weight", mean_weight)
results = []
for i, w in enumerate(mean_weight):
a_post = a_prior.copy()
a_post[i] = a_post[i] + 1
results.append(w * analytic_solution(a_post, a_prior))
return np.sum(results)


if __name__ == "__main__":
a_prior = np.array([1, 1, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
a_post = np.array([1, 1, 20, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

print("algebraic", analytic_solution(a_post, a_prior))
# print("raw",infogain(a_post, a_prior))
print("large infogain", uniform_expected_infogain(a_prior))
print("post infogain", uniform_expected_infogain(a_post))
# a_prior = np.array([1,1,1000])
# print("small infogain",uniform_expected_infogain(a_prior))
183 changes: 183 additions & 0 deletions scripts/postprocessing/scoring.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# -*- coding: utf-8 -*-
from dataclasses import dataclass, replace
from typing import Any

import numpy as np
import numpy.typing as npt
from scipy.stats import kendalltau


@dataclass
class Voter:
"""
Represents a single voter.
This tabulates the number of good votes, total votes,
and points.
We only put well-behaved people on the scoreboard and filter out the badly behaved ones
"""

uid: Any
num_votes: int
num_good_votes: int
num_prompts: int
num_good_prompts: int
num_rankings: int
num_good_rankings: int

#####################
voting_points: int
prompt_points: int
ranking_points: int

def voter_quality(self):
return self.num_good_votes / self.num_votes

def rank_quality(self):
return self.num_good_rankings / self.num_rankings

def prompt_quality(self):
return self.num_good_prompts / self.num_prompts

def is_well_behaved(self, threshhold_vote, threshhold_prompt, threshhold_rank):
return (
self.voter_quality() > threshhold_vote
and self.prompt_quality() > threshhold_prompt
and self.rank_quality() > threshhold_rank
)

def total_points(self, voting_weight, prompt_weight, ranking_weight):
return (
voting_weight * self.voting_points
+ prompt_weight * self.prompt_points
+ ranking_weight * self.ranking_points
)


def score_update_votes(new_vote: int, consensus: npt.ArrayLike, voter_data: Voter) -> Voter:
"""
This function returns the new "quality score" and points for a voter,
after that voter cast a vote on a question.

This function is only to be run when archiving a question
i.e. the question has had sufficiently many votes, or we cann't get more than "K" bits of information

The consensus is the array of all votes cast by all voters for that question
We then update the voter data using the new information

Parameters:
new_vote (int): the index of the vote cast by the voter
consensus (ArrayLike): all votes cast for this question
voter_data (Voter): a "Voter" object that represents the person casting the "new_vote"

Returns:
updated_voter (Voter): the new "quality score" and points for the voter
"""
# produces the ranking of votes, e.g. for [100,300,200] it returns [0, 2, 1],
# since 100 is the lowest, 300 the highest and 200 the middle value
consensus_ranking = np.argsort(np.argsort(consensus))
new_points = consensus_ranking[new_vote] + voter_data.voting_points

# we need to correct for 0 indexing, if you are closer to "right" than "wrong" of the conensus,
# it's a good vote
new_good_votes = int(consensus_ranking[new_vote] > (len(consensus) - 1) / 2) + voter_data.num_good_votes
new_num_votes = voter_data.num_votes + 1
return replace(voter_data, num_votes=new_num_votes, num_good_votes=new_good_votes, voting_points=new_points)


def score_update_prompts(consensus: npt.ArrayLike, voter_data: Voter) -> Voter:
"""
This function returns the gain of points for a given prompt's votes

This function is only to be run when archiving a question
i.e. the question has had sufficiently many votes, or we cann't get more than "K" bits of information

Parameters:
consensus (ArrayLike): all votes cast for this question
voter_data (Voter): a "Voter" object that represents the person that wrote the prompt

Returns:
updated_voter (Voter): the new "quality score" and points for the voter
"""
# produces the ranking of votes, e.g. for [100,300,200] it returns [0, 2, 1],
# since 100 is the lowest, 300 the highest and 200 the middle value
consensus_ranking = np.arange(len(consensus)) - len(consensus) // 2 + 1
delta_votes = np.sum(consensus_ranking * consensus)
new_points = delta_votes + voter_data.prompt_points

# we need to correct for 0 indexing, if you are closer to "right" than "wrong" of the conensus,
# it's a good vote
new_good_prompts = int(delta_votes > 0) + voter_data.num_good_prompts
new_num_prompts = voter_data.num_prompts + 1
return replace(
voter_data,
num_prompts=new_num_prompts,
num_good_prompts=new_good_prompts,
prompt_points=new_points,
)


def score_update_ranking(user_ranking: npt.ArrayLike, consensus_ranking: npt.ArrayLike, voter_data: Voter) -> Voter:
"""
This function returns the gain of points for a given ranking's votes

This function is only to be run when archiving a question
i.e. the question has had sufficiently many votes, or we cann't get more than "K" bits of information

we use the bubble-sort distance (or "kendall-tau" distance) to compare the two rankings
we use this over spearman correlation since:
"[Kendall's τ] approaches a normal distribution more rapidly than ρ, as N, the sample size, increases;
and τ is also more tractable mathematically, particularly when ties are present"
Gilpin, A. R. (1993). Table for conversion of Kendall's Tau to Spearman's
Rho within the context measures of magnitude of effect for meta-analysis

Further in
"research design and statistical analyses, second edition, 2003"
the authors note that at least from an significance test POV they will yield the same p-values

Parameters:
user_ranking (ArrayLike): ranking produced by the user
consensus (ArrayLike): ranking produced after running the voting algorithm to merge into the consensus ranking
voter_data (Voter): a "Voter" object that represents the person that wrote the prompt

Returns:
updated_voter (Voter): the new "quality score" and points for the voter
"""
bubble_sort_distance, p_value = kendalltau(user_ranking, consensus_ranking)
# normalize kendall-tau from [-1,1] into [0,1] range
bubble_sort_distance = (1 + bubble_sort_distance) / 2
new_points = bubble_sort_distance + voter_data.ranking_points
new_good_rankings = int(bubble_sort_distance > 0.5) + voter_data.num_good_rankings
new_num_rankings = voter_data.num_rankings + 1
return replace(
voter_data,
num_rankings=new_num_rankings,
num_good_rankings=new_good_rankings,
ranking_points=new_points,
)


if __name__ == "__main__":
demo_voter = Voter(
"abc",
num_votes=10,
num_good_votes=2,
num_prompts=10,
num_good_prompts=2,
num_rankings=10,
num_good_rankings=2,
voting_points=6,
prompt_points=0,
ranking_points=0,
)
new_vote = 3
consensus = np.array([200, 300, 100, 500])
print(demo_voter)
print("best vote ", score_update_votes(new_vote, consensus, demo_voter))
new_vote = 2
print("worst vote ", score_update_votes(new_vote, consensus, demo_voter))
new_vote = 1
print("medium vote ", score_update_votes(new_vote, consensus, demo_voter))
print("prompt writer", score_update_prompts(consensus, demo_voter))
print("best rank ", score_update_ranking(np.array([0, 2, 1]), np.array([0, 2, 1]), demo_voter))
print("medium rank ", score_update_ranking(np.array([2, 0, 1]), np.array([0, 2, 1]), demo_voter))
print("worst rank ", score_update_ranking(np.array([1, 0, 2]), np.array([0, 2, 1]), demo_voter))