In [None]:
import pandas as pd
import os
import json
from IPython.display import display, clear_output
import ipywidgets as widgets
from google.colab import drive

drive.mount('/content/drive')


Mounted at /content/drive


In [7]:
# === CONFIG ====
LABEL_TARGET = 1000
label_file_path = '/content/drive/MyDrive/filtered_comments.csv'
comments_path = '/content/drive/MyDrive/manually_labeled.jsonl'

# === LOAD DATA ===
with open(comments_path, 'r') as f:
    data = [json.loads(line) for line in f]
all_df = pd.DataFrame(data)

In [8]:
# Create or load labeled sample
if os.path.exists(label_file_path):
    labeled_sample = pd.read_csv(label_file_path)
    labeled_sample['label'] = labeled_sample['label'].astype(str).str.strip().str.lower()
    valid_labels = ['positive', 'negative', 'neutral']
    labeled_sample.loc[~labeled_sample['label'].isin(valid_labels), 'label'] = ''
else:
    labeled_sample = all_df.sample(n=LABEL_TARGET, random_state=42).copy()
    labeled_sample['label'] = ''
    labeled_sample.to_csv(label_file_path, index=False)

# === SETUP UNLABELED INDEX ===
unlabeled = labeled_sample[labeled_sample['label'] == ''].copy()
progress = [i for i in labeled_sample.index if labeled_sample.loc[i, 'label'] == '']

current_pos = 0


In [9]:
# === UI FUNCTIONS ===

def save_progress():
    labeled_sample.to_csv(label_file_path, index=False)

def render():
    global current_pos
    clear_output()

    if current_pos < 0:
        current_pos = 0
    if current_pos >= len(progress):
        print("🎉 All comments labeled!")
        return

    row_idx = progress[current_pos]
    comment = labeled_sample.loc[row_idx, 'comment_text']
    print(f"📝 Comment {current_pos + 1} of {len(progress)}:\n\n{comment}\n\nChoose label:")

    # Buttons
    pos_btn = widgets.Button(description='👍 Positive', button_style='success')
    neu_btn = widgets.Button(description='😐 Neutral', button_style='warning')
    neg_btn = widgets.Button(description='👎 Negative', button_style='danger')
    skip_btn = widgets.Button(description='⏭️ Skip', button_style='')
    back_btn = widgets.Button(description='⬅️ Back', button_style='info')

    def label_and_next(label):
        global current_pos
        labeled_sample.loc[row_idx, 'label'] = label
        save_progress()
        current_pos += 1
        render()

    def skip():
        global current_pos
        current_pos += 1
        render()

    def go_back():
        global current_pos
        current_pos = max(0, current_pos - 1)
        render()

    pos_btn.on_click(lambda b: label_and_next('positive'))
    neu_btn.on_click(lambda b: label_and_next('neutral'))
    neg_btn.on_click(lambda b: label_and_next('negative'))
    skip_btn.on_click(lambda b: skip())
    back_btn.on_click(lambda b: go_back())

    display(widgets.HBox([pos_btn, neu_btn, neg_btn]))
    display(widgets.HBox([back_btn, skip_btn]))



In [10]:
# === START ===
print(f"🔄 Resuming labeling at comment {current_pos + 1} of {len(progress)}")
render()


🎉 All comments labeled!
