In [3]:
import numpy as np
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

import warnings

# Suppress Warnings
import warnings
warnings.filterwarnings('ignore')

In [4]:
data = pd.read_csv('../data/labeled_post.csv')

In [5]:
# data['has_been_labeled'] = 0
# data['market_viability'] = 0
# data['business_model'] = 0
# data['time_to_mvp'] = 0
# data.to_csv('../data/labeled_post.csv', index=False)
(data['has_been_labeled'] == 0).sum()

np.int64(134)

In [6]:
# Initialize global variables
current_index = 0  # Current post index for pagination
show_all = False   # Flag to show all posts or only unlabeled ones

# Load the data if not already loaded
if 'data' not in globals():
    try:
        data = pd.read_csv('../data/labeled_post.csv')
        # Initialize columns if they don't exist
        if 'has_been_labeled' not in data.columns:
            data['has_been_labeled'] = 0
        if 'market_viability' not in data.columns:
            data['market_viability'] = None
        if 'business_model' not in data.columns:
            data['business_model'] = None
        if 'time_to_mvp' not in data.columns:
            data['time_to_mvp'] = None
    except Exception as e:
        print(f"Error loading data: {e}")
        data = pd.DataFrame()  # Create empty dataframe if loading fails

# Create an output widget for post-content and status
post_output = widgets.HTML()
status_output = widgets.HTML()

# Define mappings for display purposes
market_viability_mapping = {
    0: "Not Viable",  # Not a viable software business opportunity
    1: "Viable"       # Represents a viable software business opportunity
}

# Business model categories (representing software business models)
business_model_mapping = {
    0: "Not Viable",        # No clear software business model
    1: "SaaS",              # Software as a Service opportunity
    2: "Content/Media",     # Content, media, or information products delivered via software
    3: "Marketplace",       # Software platform connecting two sides
    4: "Community",         # Community-driven software business
    5: "API/Integration",   # Technical service or integration
    6: "Info Product"       # Software-delivered courses, guides, educational content
}

# Time to MVP levels for software products
time_to_mvp_mapping = {
    0: "Undefined",         # For posts that aren't assessed or don't fit
    1: "Weekend Project",   # Can be built in 1-2 days
    2: "Week Project",      # Can be built in a week
    3: "Month Project",     # Requires about a month
    4: "Quarter Project"    # Takes 3+ months
}

def create_post_html(post):
    # Get the label values and map them to their descriptive text
    market_viability_val = post.get('market_viability', 'Not labeled')
    business_model_val = post.get('business_model', 'Not labeled')
    time_to_mvp_val = post.get('time_to_mvp', 'Not labeled')

    # Apply mappings if values are available
    market_viability_text = market_viability_mapping.get(market_viability_val, market_viability_val) if market_viability_val != 'Not labeled' else 'Not labeled'
    business_model_text = business_model_mapping.get(business_model_val, business_model_val) if business_model_val != 'Not labeled' else 'Not labeled'
    time_to_mvp_text = time_to_mvp_mapping.get(time_to_mvp_val, time_to_mvp_val) if time_to_mvp_val != 'Not labeled' and time_to_mvp_val is not None else 'Not labeled'

    return f"""
    <div style='margin-bottom: 20px'>
        <h3>Post {current_index + 1} of {len(filtered_data())}</h3>
        <h4>Title:</h4> {post['title']}
        <h4>Body:</h4> {post['body'][:1500]}{"..." if len(str(post['body'])) > 1500 else ""}
        <h4>Subreddit:</h4> {post['subreddit']}
        <h4>Score:</h4> {post['score']}
        <h4>Comments:</h4> {post['num_comments']}
        <h4>Current labels:</h4>
        <ul>
            <li>Market Viability: {market_viability_text}</li>
            <li>Business Model: {business_model_text}</li>
            <li>Time to MVP: {time_to_mvp_text}</li>
        </ul>
    </div>
    """

def filtered_data():
    if show_all:
        return data
    return data[data['has_been_labeled'] != 1]

def update_status():
    total = len(data)
    labeled = (data['has_been_labeled'] == 1).sum()
    unlabeled = total - labeled
    viable = (data['market_viability'] == 'Viable').sum() if 'market_viability' in data.columns else 0
    status_output.value = f"""
    <p>Progress: {labeled} labeled, {unlabeled} remaining
       <br>Viable Software Opportunities: {viable} identified
    """

# def reset():
#     market_viability.value = 0
#     business_model.value  = 0
#     time_to_mvp.value = 0

def show_post():
    if len(filtered_data()) > 0:
        post = filtered_data().iloc[current_index]
        post_output.value = create_post_html(post)

        # Set initial state of the dropdowns based on the current post
        market_viability_val = post.get('market_viability')
        business_model_val = post.get('business_model')
        time_to_mvp_val = post.get('time_to_mvp')

        # Convert numpy values to Python integers
        if isinstance(market_viability_val, (np.integer, np.floating)):
            market_viability_val = int(market_viability_val)
        if isinstance(business_model_val, (np.integer, np.floating)):
            business_model_val = int(business_model_val)
        if isinstance(time_to_mvp_val, (np.integer, np.floating)):
            time_to_mvp_val = int(time_to_mvp_val)

        # Set the dropdown values without triggering the observers
        with market_viability.hold_sync():
            market_viability.key = market_viability_val if market_viability_val is not None else 0

        with business_model.hold_sync():
            business_model.key = business_model_val if business_model_val is not None else 0

        with time_to_mvp.hold_sync():
            time_to_mvp.key = time_to_mvp_val if time_to_mvp_val is not None else 0

    else:
        post_output.value = "<h3>No posts to show with current filter</h3>"
    # reset()
    update_status()

def on_next_clicked(b):
    print("Next button clicked")
    global current_index

    # Get the actual index in the original dataframe
    current_post_idx = filtered_data().index[current_index]

    # Save the current dropdown values to the dataframe
    data.at[current_post_idx, 'market_viability'] = market_viability.value
    data.at[current_post_idx, 'business_model'] = business_model.value
    data.at[current_post_idx, 'time_to_mvp'] = time_to_mvp.value

    # Mark as labeled in the original dataframe
    data.at[current_post_idx, 'has_been_labeled'] = 1

    # Save changes to CSV
    auto_save()

    # If we're not at the end of filtered data after the update
    filtered = filtered_data()
    if len(filtered) > 0 and current_index < len(filtered) - 1:
        current_index += 1
    elif len(filtered) == 0:
        # No more posts to show with current filter
        post_output.value = "<h3>No more unlabeled posts!</h3>"
    else:
        # We're at the end, stay there
        current_index = max(0, len(filtered) - 1)

    show_post()

def on_prev_clicked(b):
    global current_index
    if current_index > 0:
        current_index -= 1
        show_post()

# This function no longer updates the dataframe - only visual updates
def on_label_change(change):
    # No dataframe updates here - only updating the UI
    # Values will be saved only when Next is clicked
    update_status()

def auto_save():
    print('Auto-saving progress...')

    # Ensure column names align with our new field names
    if 'is_problem' in data.columns and 'market_viability' not in data.columns:
        data.rename(columns={
            'is_problem': 'market_viability',
            'problem_type': 'business_model',
            'difficulty': 'time_to_mvp'
        }, inplace=True)

    data.to_csv('../data/labeled_post.csv', index=False)
    update_status()

def toggle_view(b):
    global current_index, show_all
    show_all = not show_all
    current_index = 0  # Reset to the first post when switching views
    toggle_button.description = 'Showing: ' + ('All' if show_all else 'Unlabeled')
    show_post()

# Create widgets
prev_button = widgets.Button(
    description='Previous',
    button_style='info',
    layout=widgets.Layout(width='100px')
)
next_button = widgets.Button(
    description='Next',
    button_style='info',
    layout=widgets.Layout(width='100px')
)

market_viability = widgets.Dropdown(
    options=[(k, f"{v}") for k, v in market_viability_mapping.items()],
    description='Market Viability:',
    layout=widgets.Layout(width='300px')
)

business_model = widgets.Dropdown(
    options=[(k, f"{v}") for k, v in business_model_mapping.items()],
    description='Business Model:',
    layout=widgets.Layout(width='300px')
)

time_to_mvp = widgets.Dropdown(
    options=[(k, f"{v}") for k, v in time_to_mvp_mapping.items()],
    description='Time to MVP:',
    layout=widgets.Layout(width='300px')
)

# Register callbacks
prev_button.on_click(on_prev_clicked)
next_button.on_click(on_next_clicked)
market_viability.observe(on_label_change, names='value')
business_model.observe(on_label_change, names='value')
time_to_mvp.observe(on_label_change, names='value')

# Create the layout
navigation = widgets.HBox([prev_button, next_button])
controls = widgets.VBox([market_viability, business_model, time_to_mvp])
layout = widgets.VBox([post_output, status_output, navigation, controls])

# Initial display
show_post()
display(layout)

VBox(children=(HTML(value="\n    <div style='margin-bottom: 20px'>\n        <h3>Post 1 of 134</h3>\n        <h…