In [49]:
import requests
import pandas as pd
import json
from pathlib import Path
from datetime import datetime
import re


In [50]:
## USER CONFIG
VIDEO_LINK="https://www.tiktok.com/@naraazizasmith/video/7383050913730268462"
SCRAPE_COUNT=20
RETRY_COUNT=3
THREAD_COUNT=10
SAVE_PATH = Path("data")


# Scrape Manager

In [70]:
def scrape_handler(aweme_id, scrape_count, curr):
  url = f"https://www.tiktok.com/api/comment/list/?aweme_id={aweme_id}&count={scrape_count}&cursor={curr}"
  payload = {}
  headers = {
    'accept': '*/*',
    'accept-language': 'en-US,en;q=0.9',
    'dnt': '1',
    'priority': 'u=1, i',
    'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"macOS"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-origin',
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
  }

  try:
    response = requests.request("GET", url, headers=headers, data=payload)
    data_json = json.loads(response.text)
    data = [comment["text"] for comment in data_json["comments"]]
    has_more = bool(data_json["has_more"])
    return data, has_more
  
  except Exception as e:
    print(e)
    return None,1


In [68]:
def scrape_manager(video_link, scrape_count, retry_count):
    aweme_id = re.search(r"video\/([0-9]*)", video_link).group(1)
    comments = []
    retries = 0
    curr = 0
    has_more = True

    while has_more and retries < retry_count:
        data, has_more = scrape_handler(aweme_id, scrape_count, curr)
        if data == None:
            retries += 1
            continue
        else:
            comments.extend(data)
            curr += scrape_count
            retries = 0
    
    
    out = {
        "link": video_link,
        "comment_count": len(comments),
        "time_scraped": datetime.now().strftime("%d/%m/%Y, %H:%M:%S"),
        "comments": comments
    }

    with open(SAVE_PATH / (aweme_id + ".json"), "w") as f:        
        json.dump(out, f, ensure_ascii=False, indent=4)
    

In [63]:
scrape_manager(VIDEO_LINK, SCRAPE_COUNT, RETRY_COUNT)

# Batch Processing

In [72]:
video_links = [#"https://www.tiktok.com/@youcureofficial/video/7363477383309053226",
               #"https://www.tiktok.com/@blogilates/video/7118207082913942826",
               #"https://www.tiktok.com/@blogilates/video/7370460897103449386",
               "https://www.tiktok.com/@youcureofficial/video/7384306298176785707",
               "https://www.tiktok.com/@naraazizasmith/video/7382679252577029422"]


for link in video_links:
    scrape_manager(link, SCRAPE_COUNT, RETRY_COUNT)
    
