*This notebook applies GPT-4 classification to the Child Objectification Dataset.*

Load Libaries and OPENAI Key

In [None]:
import pandas as pd
from openai import OpenAI
import math
import json
import time

In [None]:
client = OpenAI(
  api_key='INSERT KEY',
)

Load Data

In [None]:
# Load comments from Excel
df = pd.read_excel("/INSERT-DATA-PATH.xlsx")
comments = df["text"].dropna().tolist()
num_comments = len(comments)

# Debug: Print total comments to process
print(f"Total comments to classify: {num_comments}")

Execution for individual prompts

NEUTRAL ZERO

In [None]:
import time
import pandas as pd
import re

num_comments = len(comments)
print(f"Running full classification on {num_comments} comments (One-by-One)...")

# Function to classify each comment individually
def classify_comments_individually(comments):
    results = []

    for i, comment in enumerate(comments):
        # Format the prompt for a single comment
        prompt = (
            f"Extract all comments that describe the physical appearance of the child in the video, "
            f"including those that consist only of emojis. The comments should specifically refer to the child "
            f"shown and include observations about their appearance. Only include comments that are positive or neutral in tone. "
            f"Ignore any comments that do not reference the child’s physical appearance or visual traits. Include comments that can be used to describe appearance.\n\n"
            f"Take not that these are TikTok comments, so be aware of TikTok specific language.\n\n"
            f"For this comment, classify it as:\n"
            f"- **1** if it describes the child's physical appearance.\n"
            f"- **0** if it does not.\n\n"
            f"Comment: {comment}\n"
            f"Classification (ONLY return 1 or 0):"
        )

        messages = [
            {"role": "system", "content": "You are an expert in content moderation and child protection."},
            {"role": "user", "content": prompt}
        ]

        try:
            # API Call for one comment at a time
            response = client.chat.completions.create(
                model="gpt-4o",
                messages=messages,
                temperature=0.1
            )

            classification = response.choices[0].message.content.strip()

            # Validate Output: Ensure it's either 1 or 0
            if classification not in ["1", "0"]:
                classification = "ERROR"

            results.append((comment, classification))

            # Debugging progress
            if i % 200 == 0:
                print(f"Processed {i+1}/{len(comments)} comments...")

            # Add small delay to avoid rate limits (adjust if needed)
            time.sleep(1)

        except Exception as e:
            print(f"Error for comment: {comment}\nError: {e}")
            results.append((comment, "ERROR"))

    return results

# Run Classification on All Comments
full_results = classify_comments_individually(comments)

# Convert Results to DataFrame
df_full = pd.DataFrame(full_results, columns=["comment", "classification"])

# Save the Full Classification Results
output_path = "/INSERT-FILE-PATH.xlsx"
df_full.to_excel(output_path, index=False)



NEUTRAL FEW

In [None]:
import time
import pandas as pd
import re

# Start Full Classification
num_comments = len(comments)
print(f"Running full classification on {num_comments} comments (One-by-One)...")

# Function to classify each comment individually
def classify_comments_individually(comments):
    results = []

    for i, comment in enumerate(comments):
        # Format the prompt for a single comment
        prompt = (
            f"Extract all comments that describe the physical appearance of the child in the video, "
            f"including those that consist only of emojis. The comments should specifically refer to the child "
            f"shown and include observations about their appearance. Only include comments that are positive or neutral in tone. "
            f"Ignore any comments that do not reference the child’s physical appearance or visual traits. Include comments that can be used to describe appearance.\n"
            f"Take not that these are TikTok comments, so be aware of TikTok specific language.\n\n"
            f"### Examples:\n\n"

            f"#### **Positive/Neutral Comments About Physical Appearance (Classification: 1)**\n"
            f"- **Example 1:**\n"
            f"  - Comment: 'She is cute'\n"
            f"  - Classification: **1**\n\n"
            f"- **Example 2:**\n"
            f"  - Comment: 'I’ve never seen anything cuter'\n"
            f"  - Classification: **1**\n\n"
            f"- **Example 3:**\n"
            f"  - Comment: 'I keep wondering who he looks like and I finally figured out. He looks like Harry Potter'\n"
            f"  - Classification: **1**\n\n"

            f"#### **Comments That Do Not Clearly Describe Physical Appearance (Classification: 0)**\n"
            f"- **Example 4:**\n"
            f"  - Comment: 'The way she says ball is so darn cute!!'\n"
            f"  - Classification: **0**\n\n"

            f"For this comment, classify it as:\n"
            f"- **1** if it describes the child's physical appearance.\n"
            f"- **0** if it does not.\n\n"
            f"Comment: {comment}\n"
            f"Classification (ONLY return 1 or 0):"
        )

        messages = [
            {"role": "system", "content": "You are an expert in content moderation and child protection."},
            {"role": "user", "content": prompt}
        ]

        try:
            # API Call for one comment at a time
            response = client.chat.completions.create(
                model="gpt-4o",
                messages=messages,
                temperature=0.1
            )

            classification = response.choices[0].message.content.strip()

            # Validate Output: Ensure it's either 1 or 0
            if classification not in ["1", "0"]:
                classification = "ERROR"

            results.append((comment, classification))

            # Debugging progress
            if i % 200 == 0:
                print(f"Processed {i+1}/{len(comments)} comments...")

            # Add small delay to avoid rate limits (adjust if needed)
            time.sleep(1)

        except Exception as e:
            print(f"Error for comment: {comment}\nError: {e}")
            results.append((comment, "ERROR"))

    return results

# Run Classification on All Comments
full_results = classify_comments_individually(comments)

# Convert Results to DataFrame
df_full = pd.DataFrame(full_results, columns=["comment", "classification"])

# Save the Full Classification Results
output_path = "/INSERT-FILE-PATH.xlsx"
df_full.to_excel(output_path, index=False)


🚀 Running full classification on 5384 comments (One-by-One)...
✅ Processed 1/5384 comments...
✅ Processed 201/5384 comments...
✅ Processed 401/5384 comments...
✅ Processed 601/5384 comments...
✅ Processed 801/5384 comments...
✅ Processed 1001/5384 comments...
✅ Processed 1201/5384 comments...
✅ Processed 1401/5384 comments...
✅ Processed 1601/5384 comments...
✅ Processed 1801/5384 comments...
✅ Processed 2001/5384 comments...
✅ Processed 2201/5384 comments...
✅ Processed 2401/5384 comments...
✅ Processed 2601/5384 comments...
✅ Processed 2801/5384 comments...
✅ Processed 3001/5384 comments...
✅ Processed 3201/5384 comments...
✅ Processed 3401/5384 comments...
✅ Processed 3601/5384 comments...
✅ Processed 3801/5384 comments...
✅ Processed 4001/5384 comments...
✅ Processed 4201/5384 comments...
✅ Processed 4401/5384 comments...
✅ Processed 4601/5384 comments...
✅ Processed 4801/5384 comments...
✅ Processed 5001/5384 comments...
✅ Processed 5201/5384 comments...

✅ Final check: 5384 class

NEGATIVE ZERO

In [None]:
import time
import pandas as pd
import re

# Start Full Classification
num_comments = len(comments)
print(f"Running full classification on {num_comments} comments (One-by-One)...")

# Function to classify each comment individually
def classify_comments_individually(comments):
    results = []

    for i, comment in enumerate(comments):
        # Format the prompt for a single comment
        prompt = (
            f"# Identify comments that describe the physical appearance of the child \n"
            f"# in the video, including those consisting only of emojis. Focus on \n"
            f"# language that may be objectifying, overly personal, possessive, \n"
            f"# sexualizing, diminishing, or otherwise inappropriate in tone.\n"
            f"Take not that these are TikTok comments, so be aware of TikTok specific language.\n"
            f"# Exclude comments that are unrelated to the child.\n"
            f"#\n"
            f"# Classify the comment as:\n"
            f"# - **1** if it contains objectifying, sexualizing, possessive, or overly personal \n"
            f"#   descriptions of the child’s appearance.\n"
            f"# - **0** if it does not.\n"
            f"#\n"
            f"# Comment:\n"
            f"# {comment}\n"
            f"#\n"
            f"# Classification:"
        )


        messages = [
            {"role": "system", "content": "You are an expert in content moderation and child protection."},
            {"role": "user", "content": prompt}
        ]

        try:
            # API Call for one comment at a time
            response = client.chat.completions.create(
                model="gpt-4o",
                messages=messages,
                temperature=0.1
            )

            classification = response.choices[0].message.content.strip()

            # Validate Output: Ensure it's either 1 or 0
            if classification not in ["1", "0"]:
                classification = "ERROR"

            results.append((comment, classification))

            # Debugging progress
            if i % 200 == 0:
                print(f"Processed {i+1}/{len(comments)} comments...")

            # Add small delay to avoid rate limits (adjust if needed)
            time.sleep(1)

        except Exception as e:
            print(f"Error for comment: {comment}\nError: {e}")
            results.append((comment, "ERROR"))

    return results

# Run Classification on All Comments
full_results = classify_comments_individually(comments)

# Convert Results to DataFrame
df_full = pd.DataFrame(full_results, columns=["comment", "classification"])

# Save the Full Classification Results
output_path = "/INSERT-FILE-PATH.xlsx"
df_full.to_excel(output_path, index=False)

NEGATIVE FEW

In [None]:
import time
import pandas as pd
import re

# Start Full Classification
num_comments = len(comments)
print(f"Running full classification on {num_comments} comments (One-by-One)...")

# **Function to classify each comment individually**
def classify_comments_individually(comments):
    results = []

    for i, comment in enumerate(comments):
        # Format the prompt for a single comment
        prompt = (
            f"# Identify comments that describe the physical appearance of the child \n"
            f"# in the video, including those consisting only of emojis. Focus on \n"
            f"# language that may be objectifying, overly personal, possessive, \n"
            f"# sexualizing, diminishing, or otherwise inappropriate in tone.\n"
            f"Take not that these are TikTok comments, so be aware of TikTok specific language.\n"
            f"# Exclude comments that are unrelated to the child.\n\n"
            f"### Examples:\n\n"

            f"#### **Comments That Objectify the Child (Classification: 1)**\n"
            f"- **Example 1:**\n"
            f"  - Comment: 'What a cute little princess 🥰🥰🥰🥰'\n"
            f"  - Classification: **1**\n\n"

            f"- **Example 2:**\n"
            f"  - Comment: 'I Love You 💝. SO Much.🥰@'\n"
            f"  - Classification: **1**\n\n"

            f"- **Example 3:**\n"
            f"  - Comment: 'She is a doll, I can’t wait to see her in the Olympics'\n"
            f"  - Classification: **1**\n\n"



            f"#### **Comments That Do Not Objectify the Child (Classification: 0)**\n"
            f"- **Example 4:**\n"
            f"  - Comment: 'She looks adorable today!'\n"
            f"  - Explanation: Compliments the child’s appearance in a normal, affectionate way without reducing her to an object or fantasy.\n"
            f"  - Classification: **0**\n\n"


            f"#\n"
            f"For this comment, classify it as:\n"
            f"# - **1** if it contains objectifying, sexualizing, possessive, or overly personal \n"
            f"#   descriptions of the child’s appearance.\n"
            f"# - **0** if it does not.\n"
            f"#\n"
            f"# Comment:\n"
            f"# {comment}\n"
            f"#\n"
            f"# Classification:"
                )

        messages = [
            {"role": "system", "content": "You are an expert in content moderation and child protection."},
            {"role": "user", "content": prompt}
        ]

        try:
            # API Call for one comment at a time
            response = client.chat.completions.create(
                model="gpt-4o",
                messages=messages,
                temperature=0.1
            )

            classification = response.choices[0].message.content.strip()

            # Validate Output: Ensure it's either 1 or 0
            if classification not in ["1", "0"]:
                classification = "ERROR"

            results.append((comment, classification))

            # Debugging progress
            if i % 200 == 0:
                print(f"Processed {i+1}/{len(comments)} comments...")

            # Add small delay to avoid rate limits (adjust if needed)
            time.sleep(1)

        except Exception as e:
            print(f"Error for comment: {comment}\nError: {e}")
            results.append((comment, "ERROR"))

    return results

# Run Classification on All Comments
full_results = classify_comments_individually(comments)

# Convert Results to DataFrame
df_full = pd.DataFrame(full_results, columns=["comment", "classification"])

# Save the Full Classification Results
output_path = "/INSERT-FILE-PATH.xlsx"
df_full.to_excel(output_path, index=False)
