In [1]:
# ─────────────────────────────────────────────────────────────
# ✅ Manual Label Review – Troubles Dataset
# Loads all rows (all label categories), shows original + translation
# ─────────────────────────────────────────────────────────────

import pandas as pd

# === CONFIG ===
BATCH_SIZE = 150  # Adjust as needed
LABEL_PATH = "../data/ubuntu/yerik/yerik_labeled_sample.csv"

# === Load and sort ===
df = pd.read_csv(LABEL_PATH)
df = df.sort_values(by='video_id').reset_index(drop=True)

# === Take the first batch ===
batch_df = df.iloc[:BATCH_SIZE].copy()
batch_df = batch_df[['video_id', 'original_text', 'translated_text', 'emoji_only', 'manual_label']]

# === Display table ===
print("⬇️ Previewing first", BATCH_SIZE, "rows (with GPT moral labels)")
batch_df.style.set_properties(
    subset=['original_text', 'translated_text'],
    **{'white-space': 'pre-wrap'}
)

⬇️ Previewing first 150 rows (with GPT moral labels)


Unnamed: 0,video_id,original_text,translated_text,emoji_only,manual_label
0,https://www.tiktok.com/@yerik.jomei/video/7371510272684494085,zayaan reposted,zayaan reposted,,Unclear
1,https://www.tiktok.com/@yerik.jomei/video/7371510272684494085,@Tebogo Mogashoa 😭,@Tebogo Mogashoa,😭,Unclear
2,https://www.tiktok.com/@yerik.jomei/video/7371510272684494085,amen,amen,,Unclear
3,https://www.tiktok.com/@yerik.jomei/video/7371510272684494085,@moo,@moo,,Unclear
4,https://www.tiktok.com/@yerik.jomei/video/7371510272684494085,🤣🤣🤣 This is this is so wholesome and cute best advice 🤣🤣🤣🤣,This is this is so wholesome and cute best advice,🤣🤣🤣🤣🤣🤣🤣,Ubuntu
5,https://www.tiktok.com/@yerik.jomei/video/7371510272684494085,🤣🤣🤣,,🤣🤣🤣,Unclear
6,https://www.tiktok.com/@yerik.jomei/video/7371510272684494085,Best advice😂,Best advice,😂,Ubuntu
7,https://www.tiktok.com/@yerik.jomei/video/7371510272684494085,😂😂😂😂,,😂😂😂😂,Unclear
8,https://www.tiktok.com/@yerik.jomei/video/7371510272684494085,yes yesss,yes yesss,,Unclear
9,https://www.tiktok.com/@yerik.jomei/video/7371510272684494085,😂😂😂,,😂😂😂,Unclear


In [None]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML

# === Load data ===
LABEL_PATH = "../data/ubuntu/yerik/yerik_labeled_sample.csv"
df = pd.read_csv(LABEL_PATH)

# === Ensure label columns exist ===
if 'manual_label_new' not in df.columns:
    df['manual_label_new'] = df['manual_label']
if 'video_moral_label' not in df.columns:
    df['video_moral_label'] = "Unclear"

# === Sort and prepare ===
df = df.sort_values(by='video_id').reset_index(drop=True)
video_ids = df['video_id'].unique().tolist()
video_pointer = {'index': 0}

label_options = ["Ubuntu", "Chaos", "Middle", "Unclear"]
comment_dropdowns = {}
video_dropdown = None

# === Save changes made via dropdowns
def save_current_labels():
    current_video_id = video_ids[video_pointer['index']]
    current_comments = df[df['video_id'] == current_video_id]
    for i, row in current_comments.iterrows():
        df.at[i, 'manual_label_new'] = comment_dropdowns[i].value
    df.loc[df['video_id'] == current_video_id, 'video_moral_label'] = video_dropdown.value

# === Display video screen
def display_video(video_idx):
    clear_output()
    video_id = video_ids[video_idx]
    video_comments = df[df['video_id'] == video_id]
    print(f"📹 Video {video_idx+1} of {len(video_ids)}")

    # Top nav
    nav_buttons = []
    def make_nav_button(i, current_idx):
        style = {'button_color': '#1a73e8'} if i == current_idx else {}
        button = widgets.Button(description=f"Video {i+1}", layout=widgets.Layout(width='80px'), style=style)
        def on_click(b):
            save_current_labels()
            video_pointer['index'] = i
            display_video(i)
        button.on_click(on_click)
        return button

    for i in range(len(video_ids)):
        nav_buttons.append(make_nav_button(i, video_idx))
    display(widgets.HBox(nav_buttons))

    # Video link
    display(HTML(f'<b>🔗 Video Link:</b> <a href="{video_id}" target="_blank">{video_id}</a>'))

    # Video-level dropdown
    global video_dropdown
    video_dropdown = widgets.Dropdown(
        options=label_options,
        value=video_comments['video_moral_label'].iloc[0],
        description="Video Label:",
        layout=widgets.Layout(width='250px')
    )
    display(video_dropdown)

    # Comments
    box_list = []
    for i, row in video_comments.iterrows():
        comment_text = f"""
        <b>Row {i}</b><br>
        <b>Original:</b> {row['original_text']}<br>
        <b>Translated:</b> {row['translated_text']}<br>
        <b>Emojis:</b> {row['emoji_only']}
        """
        comment_html = widgets.HTML(value=comment_text)
        dd = widgets.Dropdown(options=label_options, value=row['manual_label_new'], layout=widgets.Layout(width='150px'))
        comment_dropdowns[i] = dd
        box_list.append(widgets.HBox([comment_html, dd]))
    display(widgets.VBox(box_list))

    # Prev/Next
    prev_button = widgets.Button(description="⬅ Previous")
    next_button = widgets.Button(description="Next ➡")

    def on_prev(b):
        if video_pointer['index'] > 0:
            save_current_labels()
            video_pointer['index'] -= 1
            display_video(video_pointer['index'])

    def on_next(b):
        if video_pointer['index'] < len(video_ids) - 1:
            save_current_labels()
            video_pointer['index'] += 1
            display_video(video_pointer['index'])

    prev_button.on_click(on_prev)
    next_button.on_click(on_next)

    display(widgets.HBox([prev_button, next_button]))

# 🔄 Launch UI
display_video(video_pointer['index'])

HBox(children=(Button(description='Video 1', layout=Layout(width='80px'), style=ButtonStyle()), Button(descrip…

Dropdown(description='Video Label:', layout=Layout(width='250px'), options=('Ubuntu', 'Chaos', 'Middle', 'Uncl…

VBox(children=(HBox(children=(HTML(value='\n        <b>Row 129</b><br>\n        <b>Original:</b> 😂😂😂😂<br>\n   …

HBox(children=(Button(description='⬅ Previous', style=ButtonStyle()), Button(description='Next ➡', style=Butto…

In [3]:
# === Preview your work before saving ===
def preview_labels():
    save_current_labels()
    preview_df = df[['video_id', 'original_text', 'manual_label', 'manual_label_new', 'video_moral_label']].copy()
    display(preview_df.head(150))  # Show all

preview_labels()

Unnamed: 0,video_id,original_text,manual_label,manual_label_new,video_moral_label
0,https://www.tiktok.com/@yerik.jomei/video/7371...,zayaan reposted,Unclear,Middle,Ubuntu
1,https://www.tiktok.com/@yerik.jomei/video/7371...,@Tebogo Mogashoa 😭,Unclear,Middle,Ubuntu
2,https://www.tiktok.com/@yerik.jomei/video/7371...,amen,Unclear,Middle,Ubuntu
3,https://www.tiktok.com/@yerik.jomei/video/7371...,@moo,Unclear,Middle,Ubuntu
4,https://www.tiktok.com/@yerik.jomei/video/7371...,🤣🤣🤣 This is this is so wholesome and cute best...,Ubuntu,Ubuntu,Ubuntu
...,...,...,...,...,...
145,https://www.tiktok.com/@yerik.jomei/video/7511...,I love the way it all leads back to Jesus our ...,Ubuntu,Ubuntu,Ubuntu
146,https://www.tiktok.com/@yerik.jomei/video/7511...,😂😂😂👌👌🥰🥰,Unclear,Middle,Ubuntu
147,https://www.tiktok.com/@yerik.jomei/video/7511...,funny facts ❤️,Unclear,Middle,Ubuntu
148,https://www.tiktok.com/@yerik.jomei/video/7511...,"Thank you please, someone had to say it 😂😂😂😂🤷‍...",Ubuntu,Ubuntu,Ubuntu


In [4]:
# === Save final CSV manually when ready ===
def save_labels():
    save_current_labels()
    SAVE_PATH = "../data/ubuntu/yerik/yerik_labeled_sample.csv"
    df.to_csv(SAVE_PATH, index=False)
    print(f"✅ Saved to {SAVE_PATH}")


save_labels()


✅ Saved to ../data/ubuntu/yerik/yerik_labeled_sample.csv
