
# Labeled Reddit Post Reviewer

Browse each labeled Reddit post one-by-one. Use the **Previous** and **Next** buttons below to move through entries without rerunning cells.


In [9]:
import html
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

In [10]:
df = pd.read_csv("../data/labeled_sample.csv")
total_entries = len(df)
print(set(df.solution_domain))

{'not_applicable', 'software_plus_hardware', '-', 'software_plus_external', 'software_only'}


In [2]:
current_index = 0

if total_entries == 0:
    display(widgets.HTML(value="<b>No labeled entries found in the dataset.</b>"))
else:
    style_block = widgets.HTML(value="""
    <style>
    .reddit-review-container {
        max-width: 900px;
        margin: 0 auto;
        font-family: 'Segoe UI', Tahoma, sans-serif;
        line-height: 1.5;
    }
    .reddit-card {
        background: #ffffff;
        border-radius: 12px;
        box-shadow: 0 6px 18px rgba(0, 0, 0, 0.08);
        padding: 24px 28px;
        margin-top: 16px;
        border: 1px solid #e0e0e0;
    }
    .reddit-title {
        font-size: 24px;
        font-weight: 600;
        margin-bottom: 16px;
        color: #000000;
        max-height: 180px;
        overflow-y: auto;
        padding-right: 8px;
    }
    .reddit-body {
        font-size: 16px;
        background: #f9fafc;
        color: #000000;
        border-radius: 10px;
        padding: 18px;
        border: 1px solid #dfe3eb;
        overflow-y: visible;
        white-space: pre-wrap;
    }
    .classification-section {
        margin-top: 28px;
        padding: 18px 18px 12px 18px;
        border-top: 2px solid #e3e6ef;
        background: #f5f7fb;
        border-radius: 0 0 12px 12px;
    }
    .classification-title {
        font-size: 18px;
        font-weight: 700;
        margin-bottom: 12px;
        color: #000000;
    }
    .classification-row {
        display: flex;
        gap: 12px;
        margin-bottom: 10px;
        font-size: 16px;
    }
    .classification-label {
        min-width: 150px;
        font-weight: 600;
        color: #000000;
    }
    .entry-counter {
        font-size: 14px;
        color: #546e7a;
        padding-top: 6px;
    }
    </style>
    """)

    header_html = widgets.HTML(value=f"<h2>Total labeled entries: {total_entries}</h2>")
    entry_counter = widgets.HTML(layout=widgets.Layout(margin='0 12px'))

    prev_button = widgets.Button(description='Previous', icon='arrow-left', button_style='')
    next_button = widgets.Button(description='Next', icon='arrow-right', button_style='')

    button_box = widgets.HBox(
        [prev_button, entry_counter, next_button],
        layout=widgets.Layout(justify_content='space-between', align_items='center')
    )

    card_html = widgets.HTML()

    container = widgets.VBox(
        [style_block, header_html, button_box, card_html],
        layout=widgets.Layout(width='100%'),
    )

    def sanitize(text: str) -> str:
        if pd.isna(text) or text == '':
            return "<em>Not provided.</em>"
        return html.escape(str(text))

    def format_entry(index: int):
        row = df.iloc[index]
        title = sanitize(row.get('title', ''))
        body = sanitize(row.get('body', ''))
        is_problem = int(row.get('is_problem', 0))
        solution_domain = row.get('solution_domain', '')
        solution_domain = '—' if pd.isna(solution_domain) or solution_domain == '' else html.escape(str(solution_domain))

        status_icon = '✅' if is_problem == 1 else '❌'

        classification_html = f"""
            <div class='classification-section'>
                <div class='classification-title'>Classification</div>
                <div class='classification-row'>
                    <div class='classification-label'>Is Problem</div>
                    <div style='color: #000000; font-weight: 600;'>{status_icon} {is_problem}</div>
                </div>
                <div class='classification-row'>
                    <div class='classification-label'>Solution Domain</div>
                    <div style='color: #000000;'>{solution_domain}</div>
                </div>
            </div>
        """

        card = (
            "<div class='reddit-card'>"
            f"<div class='reddit-title'>{title}</div>"
            f"<div class='reddit-body'>{body}</div>"
            f"{classification_html}"
            "</div>"
        )
        return card

    def update_display(index: int):
        card_html.value = format_entry(index)
        entry_counter.value = f"<div class='entry-counter'>Entry {index + 1} of {total_entries}</div>"
        prev_button.disabled = index == 0
        next_button.disabled = index == total_entries - 1

    def on_prev_click(_):
        global current_index
        if current_index > 0:
            current_index -= 1
            update_display(current_index)

    def on_next_click(_):
        global current_index
        if current_index < total_entries - 1:
            current_index += 1
            update_display(current_index)

    prev_button.on_click(on_prev_click)
    next_button.on_click(on_next_click)

    update_display(current_index)
    display(container)


VBox(children=(HTML(value="\n    <style>\n    .reddit-review-container {\n        max-width: 900px;\n        m…