In [5]:
import requests
import pandas as pd
import time
import re
from tqdm import tqdm
import json

print("✅ Setup Complete. Libraries for API requests are installed.")

✅ Setup Complete. Libraries for API requests are installed.


In [8]:
# =============================================================================
# Cell 2: Modular Functions (API Logic)
# =============================================================================

def get_video_id_from_url(video_url):
    """
    Extracts the unique video ID from a full TikTok URL using regex.
    e.g., '.../video/7548476830056025374' -> '7548476830056025374'
    """
    print("Extracting video ID...")
    match = re.search(r"/video/(\d+)", video_url)
    if match:
        video_id = match.group(1)
        print(f"✅ Found Video ID: {video_id}")
        return video_id
    else:
        print("❌ Could not find video ID in the URL.")
        return None

def fetch_comments_page(video_id, cursor=0, count=50):
    """
    Fetches a single page of comments from TikTok's internal API.

    Args:
        video_id (str): The unique ID of the video.
        cursor (int): The starting point for the comment page (0 for the first page).
        count (int): How many comments to fetch per request (max seems to be around 50).

    Returns:
        dict: The JSON response from the API, or None if the request fails.
    """
    # This is the base API endpoint. The parameters will be added by `requests`.
    API_ENDPOINT = "https://www.tiktok.com/api/comment/list/"

    params = {
        "aid": "1988",
        "aweme_id": video_id,
        "count": count,
        "cursor": cursor
    }

    # We add headers to look like a real browser request.
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
        "Accept-Language": "en-US,en;q=0.5",
    }

    try:
        response = requests.get(API_ENDPOINT, params=params, headers=headers)
        response.raise_for_status()  # This will raise an error for bad responses (4xx or 5xx)
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"❌ API request failed: {e}")
        return None

def extract_data_from_response(response_json):
    """
    Parses the JSON response to extract relevant comment data.
    """
    comments_data = []
    if not response_json or "comments" not in response_json or response_json["comments"] is None:
        return comments_data, 0, False # No comments, new cursor is 0, no more pages

    for comment in response_json["comments"]:
        comments_data.append({
            'username': comment.get('user', {}).get('nickname', 'N/A'),
            'comment_text': comment.get('text', ''),
            'digg_count': comment.get('digg_count', 0),
            'create_time': comment.get('create_time', 0)
        })

    new_cursor = response_json.get("cursor", 0)
    has_more = response_json.get("has_more", 0) == 1

    return comments_data, new_cursor, has_more

def save_to_csv(data, filename="tiktok_comments_api1.csv"):
    """Saves the extracted data to a CSV file."""
    if not data:
        print("No data to save.")
        return

    df = pd.DataFrame(data)
    df.to_csv(filename, index=False, encoding='utf-8-sig')
    print(f"💾 Data successfully saved to {filename}.")

In [9]:
# =============================================================================
# Cell 3: Configuration and Main Execution
# =============================================================================

# --- Configuration ---
VIDEO_URL = "https://www.tiktok.com/@obsii_boon/video/7559164735350967608" # Paste the URL here
MAX_COMMENTS_TO_FETCH = 200 # Set your desired total number of comments
DELAY_BETWEEN_REQUESTS = 1.0 # Seconds to wait to be respectful to the API

# --- Main Execution Logic ---
video_id = get_video_id_from_url(VIDEO_URL)

if video_id:
    all_comments = []
    current_cursor = 0
    has_more_comments = True

    pbar = tqdm(total=MAX_COMMENTS_TO_FETCH, desc="Fetching comments")

    while len(all_comments) < MAX_COMMENTS_TO_FETCH and has_more_comments:
        # --- Step 1: Fetch a page of data from the API ---
        response_data = fetch_comments_page(video_id, cursor=current_cursor)

        if response_data:
            # --- Step 2: Extract and process the data ---
            comments_page, new_cursor, has_more_comments = extract_data_from_response(response_data)

            if not comments_page: # Break if no comments were returned
                print("No more comments found on this page.")
                break

            all_comments.extend(comments_page)
            pbar.update(len(comments_page))

            # --- Step 3: Prepare for the next loop ---
            current_cursor = new_cursor

            # Be a good internet citizen and wait before the next request
            time.sleep(DELAY_BETWEEN_REQUESTS)
        else:
            # If the API request failed, stop trying.
            print("Halting due to API request failure.")
            break

    pbar.close()

    # --- Step 4: Save the final results ---
    # Trim the list to the exact number requested
    final_comments = all_comments[:MAX_COMMENTS_TO_FETCH]
    print(f"\nTotal comments fetched: {len(final_comments)}")
    save_to_csv(final_comments)
else:
    print("Could not proceed without a valid Video ID.")

Extracting video ID...
✅ Found Video ID: 7559164735350967608


Fetching comments: 100%|██████████| 200/200 [00:04<00:00, 40.63it/s]


Total comments fetched: 200
💾 Data successfully saved to tiktok_comments_api1.csv.



