In [1]:
# ─────────────────────────────────────────────────────────────
# ✅ Manual Label Review – Troubles Dataset
# Loads all rows (all label categories), shows original + translation
# ─────────────────────────────────────────────────────────────

import pandas as pd

# === CONFIG ===
BATCH_SIZE = 150  # Adjust as needed
LABEL_PATH = "../data/middle/pieter/pieter_labeled_sample.csv"

# === Load and sort ===
df = pd.read_csv(LABEL_PATH)
df = df.sort_values(by='video_id').reset_index(drop=True)

# === Take the first batch ===
batch_df = df.iloc[:BATCH_SIZE].copy()
batch_df = batch_df[['video_id', 'original_text', 'translated_text', 'emoji_only', 'manual_label']]

# === Display table ===
print("⬇️ Previewing first", BATCH_SIZE, "rows (with GPT moral labels)")
batch_df.style.set_properties(
    subset=['original_text', 'translated_text'],
    **{'white-space': 'pre-wrap'}
)

⬇️ Previewing first 150 rows (with GPT moral labels)


Unnamed: 0,video_id,original_text,translated_text,emoji_only,manual_label
0,https://www.tiktok.com/@thepieterkriel/video/7479652795608567046,it's the same issue in the black community,it's the same issue in the black community,,Middle
1,https://www.tiktok.com/@thepieterkriel/video/7479652795608567046,Racism is taught,Racism is taught,,Middle
2,https://www.tiktok.com/@thepieterkriel/video/7479652795608567046,"GOD bless you,😊","GOD bless you,",😊,Ubuntu
3,https://www.tiktok.com/@thepieterkriel/video/7479652795608567046,💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙.,VIVA DAVIVA DAVIVA DAVIVA DAVIVA DAVIVA DAVIVA DAVIVA DAVIVA DAVIVA DAVIVA DAVIVA DAVIVA DAVIVA DAVIVA DAVIVA DAVIVA DA.,💙💙💙💙💙💙💙💙💙💙💙💙💙💙💙💙💙💙💙💙💙💙,Unclear
4,https://www.tiktok.com/@thepieterkriel/video/7479652795608567046,you are brave and wise,you are brave and wise,,Ubuntu
5,https://www.tiktok.com/@thepieterkriel/video/7479652795608567046,We need more people like you in South Africa.,We need more people like you in South Africa.,,Ubuntu
6,https://www.tiktok.com/@thepieterkriel/video/7479652795608567046,Peter you are the next South African president. I wish you could go study Law and political studies. Engage with the people. Prepare yourself now. I am black and I will vote for you.,Peter you are the next South African president. I wish you could go study Law and political studies. Engage with the people. Prepare yourself now. I am black and I will vote for you.,,Ubuntu
7,https://www.tiktok.com/@thepieterkriel/video/7479652795608567046,😂,,😂,Unclear
8,https://www.tiktok.com/@thepieterkriel/video/7479652795608567046,I think you going far bro with your mindset keep it up 👏👏👏📿,I think you going far bro with your mindset keep it up,👏👏👏📿,Ubuntu
9,https://www.tiktok.com/@thepieterkriel/video/7479652795608567046,Only very few whites are welcoming but the majority of them are acting with an animal extinct towards other people of different races worldwide. 🙏🏽,Only very few whites are welcoming but the majority of them are acting with an animal extinct towards other people of different races worldwide.,🙏🏽,Middle


In [None]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML

# === Load data ===
LABEL_PATH = "../data/middle/pieter/pieter_labeled_sample.csv"
df = pd.read_csv(LABEL_PATH)

# === Ensure label columns exist ===
if 'manual_label_new' not in df.columns:
    df['manual_label_new'] = df['manual_label']
if 'video_moral_label' not in df.columns:
    df['video_moral_label'] = "Unclear"

# === Sort and prepare ===
df = df.sort_values(by='video_id').reset_index(drop=True)
video_ids = df['video_id'].unique().tolist()
video_pointer = {'index': 0}

label_options = ["Ubuntu", "Chaos", "Middle", "Unclear"]
comment_dropdowns = {}
video_dropdown = None

# === Save changes made via dropdowns
def save_current_labels():
    current_video_id = video_ids[video_pointer['index']]
    current_comments = df[df['video_id'] == current_video_id]
    for i, row in current_comments.iterrows():
        df.at[i, 'manual_label_new'] = comment_dropdowns[i].value
    df.loc[df['video_id'] == current_video_id, 'video_moral_label'] = video_dropdown.value

# === Display video screen
def display_video(video_idx):
    clear_output()
    video_id = video_ids[video_idx]
    video_comments = df[df['video_id'] == video_id]
    print(f"📹 Video {video_idx+1} of {len(video_ids)}")

    # Top nav
    nav_buttons = []
    def make_nav_button(i, current_idx):
        style = {'button_color': '#1a73e8'} if i == current_idx else {}
        button = widgets.Button(description=f"Video {i+1}", layout=widgets.Layout(width='80px'), style=style)
        def on_click(b):
            save_current_labels()
            video_pointer['index'] = i
            display_video(i)
        button.on_click(on_click)
        return button

    for i in range(len(video_ids)):
        nav_buttons.append(make_nav_button(i, video_idx))
    display(widgets.HBox(nav_buttons))

    # Video link
    display(HTML(f'<b>🔗 Video Link:</b> <a href="{video_id}" target="_blank">{video_id}</a>'))

    # Video-level dropdown
    global video_dropdown
    video_dropdown = widgets.Dropdown(
        options=label_options,
        value=video_comments['video_moral_label'].iloc[0],
        description="Video Label:",
        layout=widgets.Layout(width='250px')
    )
    display(video_dropdown)

    # Comments
    box_list = []
    for i, row in video_comments.iterrows():
        comment_text = f"""
        <b>Row {i}</b><br>
        <b>Original:</b> {row['original_text']}<br>
        <b>Translated:</b> {row['translated_text']}<br>
        <b>Emojis:</b> {row['emoji_only']}
        """
        comment_html = widgets.HTML(value=comment_text)
        dd = widgets.Dropdown(options=label_options, value=row['manual_label_new'], layout=widgets.Layout(width='150px'))
        comment_dropdowns[i] = dd
        box_list.append(widgets.HBox([comment_html, dd]))
    display(widgets.VBox(box_list))

    # Prev/Next
    prev_button = widgets.Button(description="⬅ Previous")
    next_button = widgets.Button(description="Next ➡")

    def on_prev(b):
        if video_pointer['index'] > 0:
            save_current_labels()
            video_pointer['index'] -= 1
            display_video(video_pointer['index'])

    def on_next(b):
        if video_pointer['index'] < len(video_ids) - 1:
            save_current_labels()
            video_pointer['index'] += 1
            display_video(video_pointer['index'])

    prev_button.on_click(on_prev)
    next_button.on_click(on_next)

    display(widgets.HBox([prev_button, next_button]))

# 🔄 Launch UI
display_video(video_pointer['index'])




HBox(children=(Button(description='Video 1', layout=Layout(width='80px'), style=ButtonStyle()), Button(descrip…

Dropdown(description='Video Label:', index=3, layout=Layout(width='250px'), options=('Ubuntu', 'Chaos', 'Middl…

VBox(children=(HBox(children=(HTML(value='\n        <b>Row 128</b><br>\n        <b>Original:</b> I like this, …

HBox(children=(Button(description='⬅ Previous', style=ButtonStyle()), Button(description='Next ➡', style=Butto…

In [9]:
# Review full table before saving
display(df[['video_id', 'original_text', 'translated_text', 'manual_label', 'manual_label_new', 'video_moral_label']])

# Save button
save_button = widgets.Button(description="💾 Save to CSV")

def on_save(b):
    df.to_csv("../data/middle/pieter/pieter_manual_review.csv", index=False)
    print("✅ Saved to pieter_manual_review.csv")

save_button.on_click(on_save)
display(save_button)


Unnamed: 0,video_id,original_text,translated_text,manual_label,manual_label_new,video_moral_label
0,https://www.tiktok.com/@thepieterkriel/video/7...,it's the same issue in the black community,it's the same issue in the black community,Middle,Middle,Middle
1,https://www.tiktok.com/@thepieterkriel/video/7...,Racism is taught,Racism is taught,Middle,Middle,Middle
2,https://www.tiktok.com/@thepieterkriel/video/7...,"GOD bless you,😊","GOD bless you,",Ubuntu,Ubuntu,Middle
3,https://www.tiktok.com/@thepieterkriel/video/7...,💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA ...,VIVA DAVIVA DAVIVA DAVIVA DAVIVA DAVIVA DAVIVA...,Unclear,Middle,Middle
4,https://www.tiktok.com/@thepieterkriel/video/7...,you are brave and wise,you are brave and wise,Ubuntu,Ubuntu,Middle
...,...,...,...,...,...,...
145,https://www.tiktok.com/@thepieterkriel/video/7...,"Corruption has done enormous damage Vusi, bett...","Corruption has done enormous damage Vusi, bett...",Middle,Middle,Unclear
146,https://www.tiktok.com/@thepieterkriel/video/7...,As South Africans we are not angry enough 🤞🏾,As South Africans we are not angry enough,Middle,Middle,Unclear
147,https://www.tiktok.com/@thepieterkriel/video/7...,🫡,,Unclear,Unclear,Unclear
148,https://www.tiktok.com/@thepieterkriel/video/7...,khuphuka lapho mfana bayoze bakuvume ngeqiniso...,Go up when the boy will let you know about the...,Unclear,Unclear,Unclear


Button(description='💾 Save to CSV', style=ButtonStyle())

In [10]:
# === Preview your work before saving ===
def preview_labels():
    save_current_labels()
    preview_df = df[['video_id', 'original_text', 'manual_label', 'manual_label_new', 'video_moral_label']].copy()
    display(preview_df.head(150))  # Show all

preview_labels()

Unnamed: 0,video_id,original_text,manual_label,manual_label_new,video_moral_label
0,https://www.tiktok.com/@thepieterkriel/video/7...,it's the same issue in the black community,Middle,Middle,Middle
1,https://www.tiktok.com/@thepieterkriel/video/7...,Racism is taught,Middle,Middle,Middle
2,https://www.tiktok.com/@thepieterkriel/video/7...,"GOD bless you,😊",Ubuntu,Ubuntu,Middle
3,https://www.tiktok.com/@thepieterkriel/video/7...,💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA DA💙VIVA ...,Unclear,Middle,Middle
4,https://www.tiktok.com/@thepieterkriel/video/7...,you are brave and wise,Ubuntu,Ubuntu,Middle
...,...,...,...,...,...
145,https://www.tiktok.com/@thepieterkriel/video/7...,"Corruption has done enormous damage Vusi, bett...",Middle,Middle,Middle
146,https://www.tiktok.com/@thepieterkriel/video/7...,As South Africans we are not angry enough 🤞🏾,Middle,Chaos,Middle
147,https://www.tiktok.com/@thepieterkriel/video/7...,🫡,Unclear,Middle,Middle
148,https://www.tiktok.com/@thepieterkriel/video/7...,khuphuka lapho mfana bayoze bakuvume ngeqiniso...,Unclear,Ubuntu,Middle


In [11]:
# === Save final CSV manually when ready ===
def save_labels():
    save_current_labels()
    SAVE_PATH = "../data/middle/pieter/pieter_manual_review.csv"
    df.to_csv(SAVE_PATH, index=False)
    print(f"✅ Saved to {SAVE_PATH}")


save_labels()


✅ Saved to ../data/middle/pieter/pieter_manual_review.csv
