In [102]:
import yaml
import json
import os
import ipywidgets as widgets
from IPython.display import display, Markdown, HTML, clear_output


In [103]:
!rm scratch.jsonl

In [104]:
!cat scratch.jsonl

cat: scratch.jsonl: No such file or directory


In [113]:

styles = {
    'Accepted': 'background-color: rgba(0, 255, 0, 0.3);',  # Translucent green
    'Rejected': 'background-color: rgba(255, 0, 0, 0.3);',  # Translucent red
    'No judgement': 'background-color: rgba(0, 0, 0, 0.3);'
}

def display_item(data, judgement_file, index=0):
    if os.path.isfile(judgement_file):
        with open(judgement_file) as fle:
            judgement_map = yaml.safe_load(fle)
    else:
        judgement_map = dict()

    clear_output(wait=True)
    item = data[index]
    text_display = Markdown(item['text'])
    display(text_display)

    accept_button = widgets.Button(description="Accept")
    reject_button = widgets.Button(description="Reject")
    next_button = widgets.Button(description="Next")
    prev_button = widgets.Button(description="Previous")

    def display_verdict(verdict):
        verdict_display = HTML(f"<div style='{styles[verdict]}'>{verdict}</div>")
        display(verdict_display)

    def on_accept(b):
        judgement_map[item["id"]] = True
        with open(judgement_file, 'w') as fle:
            yaml.dump(judgement_map, fle)

        navigate(1)

    def on_reject(b):
        judgement_map[item["id"]] = False
        with open(judgement_file, 'w') as fle:
            yaml.dump(judgement_map, fle)
        navigate(1)

    def navigate(step):
        nonlocal index
        index = min(max(0, index + step), len(data) - 1)
        display_item(data, judgement_file, index)

    def display_buttons():
        button_box = widgets.HBox([accept_button, reject_button, prev_button, next_button])
        display(button_box)
    
    def display_location(index):
        display(Markdown(f"Index: {index}/{len(data)}"))

        display(Markdown(f"Post ID: {item['id']}"))
    
        num_judged = len(judgement_map.keys())
        num_accepted = sum(judgement_map.values())
        display(Markdown(f"Accepted: {num_accepted}, Judged: {num_judged}"))

    accept_button.on_click(on_accept)
    reject_button.on_click(on_reject)
    next_button.on_click(lambda b: navigate(1))
    prev_button.on_click(lambda b: navigate(-1))

    display_buttons()

    if item["id"] in judgement_map:
        verdict = 'Accepted' if judgement_map[item["id"]] else 'Rejected'
    else:
        verdict = 'No judgement'

    display_verdict(verdict)
    display_location(index)

# Example data and display call
data = [{'id': 47, 'text': 'Some MathJax content $E=mc^2$'}, 
        {'id': 132, 'text': 'More content $\int_0^\infty e^{-x^2} dx$'},
        {'id': 12, 'text': 'More filler content'}]
display_item(data, "scratch.jsonl")


SyntaxError: f-string: unmatched '[' (3444203761.py, line 53)

In [106]:
def load_stackexchange(filepath):
    data = []
    with open(filepath) as fle:
        for line in fle:
            row = json.loads(line)
            row["text"] = "INPUT:\n" + row["input"] + "\n\nOUTPUT:\n" + row["output"]
            row["id"] = row["meta"]["post_id"]
            data.append(row)
    return data

In [112]:
!cat human-judgements/cstheory_judgements.yaml

41: false


In [None]:
data_file = "filtered-stack-exchange/cstheory_filtered.jsonl"
judgement_file = "human-judgements/cstheory_judgements.yaml"
se_data = load_stackexchange(data_file)
print(data)

display_item(se_data, judgement_file=judgement_file)

INPUT:
If $f$ is a convex function then Jensen's inequality states that $f(\textbf{E}[x]) \le \textbf{E}[f(x)]$, and mutatis mutandis when $f$ is concave.  Clearly in the worst case you cannot upper bound $\textbf{E}[f(x)]$ in terms of $f(\textbf{E}[x])$ for a convex $f$, but is there a bound that goes in this direction if $f$ is convex but "not too convex"?  Is there some standard bound that  gives conditions on a convex function $f$ (and possibly the distribution as well, if necessary) that would allow you to conclude that $\textbf{E}[f(x)] \le \varphi(f)f(\textbf{E}[x])$, where $\varphi(f)$ is some function of the curvature/degree of convexity of $f$?  Something akin to a Lipschitz condition, perhaps?

OUTPUT:
EDIT: original version missed an absolute value.  sorry!!
Hi Ian.  I will briefly outline two sample inequalities, one using a Lipschitz bound, the other using a bound on the second derivative, and then discuss some difficulties in this problem.  Although I'm being redundant, since an approach using one derivative explains what happens with more derivatives (via Taylor), it turns out that the second derivative version is quite nice.
First, with a Lipschitz bound: simply re-work the standard Jensen inequality.  The same trick applies: compute the Taylor expansion at the expected value.
Specifically, Let $X$ have corresponding measure $\mu$, and set $m := \textrm E(x)$.
If $f$ has Lipschitz constant $L$, then by Taylor's theorem
$$
f(x) = f(m) + f'(z)(x-m) \leq f(m) + L|x-m|,
$$
where $z \in [m, x]$ (note that $x\leq m$ and $x> m$ are possible). Using this and re-working the Jensen proof (I am paranoid and checked that the standard one is indeed on wikipedia),
\begin{align}
\operatorname{E}(f(X)) & = 
\int f(x) \, d\mu(x) 
\leq f(m) \int d\mu(x) + L\int |x-m| \, d\mu(x) \\[6pt]
& = f(\operatorname{E}(X)) + L \operatorname{E} (|X-\operatorname{E}(X)|).
\end{align}
Now, suppose $|f''(x)| \leq \lambda$.  In this case,
\begin{align}
f(x) & = f(m) + f'(m)(x-m) + f''(z) \frac{(x-m)^2} 2 \\[6pt]
& \leq f(m) + f'(m)(x-m) + \lambda \frac{(x-m)^2} 2,
\end{align}
and so
\begin{align}
\operatorname{E}(f(X))
& \leq f(m) + f'(m)(\operatorname{E}(X) - m) + \frac {\lambda \operatorname{E}((X-m)^2)}{2} \\[6pt]
& = f(\operatorname{E}(X)) + \frac {\lambda \operatorname{Var}(X)}2.
\end{align}
I'd like to briefly mention a few things.  Sorry if they are obvious.
One is that, you can't merely say "wlog $\operatorname{E}(X) = 0$" by shifting the distribution, because you are changing the relationship between $f$ and $\mu$.
Next is that the bound must depend on the distribution in some way.  To see this, imagine that $X \sim \textrm{Gaussian}(0, \sigma^2)$ and $f(x) = x^2$.  Whatever the value of $\sigma$, you still get $f(\operatorname{E}(X)) = f(0) = 0$.  On the other hand, $\operatorname{E}(f(X)) = \operatorname{E}(X^2) = \sigma^2$.  Thus, by changing $\sigma$, you can make the gap between the two quantities arbitrary!  Intuitively, more mass is pushed away from the mean, and thus, for any strictly convex function, $\operatorname{E} (f(X))$ will increase.
Lastly, I don't see how to get a multiplicative bound like you suggest.  Everything I have used in this post is standard: Taylor's theorem and derivative bounds are bread&butter in statistics bounds, and they automatically give additive, not multiplicative errors.
I will think about it though, and post something.  Vague intuition is it will need very strenous conditions on both the function and the distribution, and that the additive bound is actually at the heart of it.

HBox(children=(Button(description='Accept', style=ButtonStyle()), Button(description='Reject', style=ButtonSty…

Index: 10/213

Accepted: 5, Judged: 10