In [None]:
import requests
import dotenv
import os, time
import pandas as pd

dotenv.load_dotenv()

In [None]:
scratch = os.getenv('SCRATCH')
hashtag_folder = f"{scratch}/tiktok/hashtag_data"

oak = os.getenv('OAK')
video_folder = f"{oak}/samori/tiktok/test_folder2"
os.path.exists(video_folder)

In [None]:
def get_access_token(client_key, client_secret):
    
    endpoint_url = "https://open.tiktokapis.com/v2/oauth/token/"
    headers = {
        'Content-Type': 'application/x-www-form-urlencoded',
    }

    data = {
        'client_key': client_key,
        'client_secret': client_secret,
        'grant_type': 'client_credentials'
    }

    response = requests.post(endpoint_url, headers=headers, data=data)

    if response.status_code == 200:
        response_json = response.json()
        return response_json
        
    else:
        # If the request was not successful, print the error response JSON
        print("Error:", response.json())

In [None]:
def make_request_video(endpoint, query_body, query_params, headers, save_folder):
    start_date = query_body["start_date"]
    end_date = query_body["end_date"]
    prev_cursor = query_body["cursor"]
    
    if prev_cursor > 0:
        prev_search_id = query_body["search_id"]

    # make post request
    response = requests.post(endpoint, json=query_body, params=query_params, headers=headers)

    # checking if request was successful
    if response.status_code != 200:
        i = 0
        # retrying request 10 times if request was unsuccessful
        while i < 10 and response.status_code != 200:
            response = requests.post(endpoint, json=query_body, params=query_params, headers=headers)
            time.sleep(5)
            i += 1
        # increasing cursor to see if there will be some luck
        while response.status_code == 500:
            current_cursor =  query_body['cursor']
            query_body.update({"cursor": current_cursor+10})
            response = requests.post(endpoint, json=query_body, params=query_params, headers=headers)
            time.sleep(5)
            
        if response.status_code != 200:
            status_code = response.status_code
            print("Error code", response.status_code)
            print(response.json())
            return query_body["search_id"], False, query_body["cursor"], str(response.json()), status_code

    status_code = response.status_code
    
    # extracting information for pagination
    data = response.json().get("data", {})
    
    has_more = data["has_more"]
    cursor = data["cursor"]

    if not "search_id" in list(data.keys()) and prev_cursor > 0:
        search_id = prev_search_id
    else:
        search_id = data["search_id"]
        
    # if len(data['videos']) != 0 and : # note: update to be compatible with other endpoints
    #     search_id = data["search_id"]
    # else:
    #     search_id = -1

    # saving queried data
    videos = data.get("videos", []) # note: update to be compatible with other endpoints
    df = pd.DataFrame(videos)
    df.to_csv(f"{save_folder}/videos_{start_date}_{end_date}_{cursor}_{search_id}.csv", index=False)

    return search_id, has_more, cursor, len(df), status_code

In [None]:
def query_api_video(endpoint_name, fields, query, cursor, 
              start_date,end_date, save_folder):
    
    # getting credentials
    client_key = os.getenv("CLIENT_KEY")
    client_secret = os.getenv("CLIENT_SECRET")
    
    credentials = get_access_token(client_key, client_secret)
    access_token = credentials["access_token"]
    token_type = credentials["token_type"]

    # note: update this to have url of other endpoints
    endpoints_dict = {"video": "https://open.tiktokapis.com/v2/research/video/query/"}
    
    query_params = {"fields": fields}
    query_body = {"query":query, "max_count":100, "cursor":cursor,
                  "start_date":str(start_date), "end_date":str(end_date)}
    headers = {"Content-Type": "application/json",
               "Authorization": f"{token_type} {access_token}"}
    
    endpoint = endpoints_dict[endpoint_name]
    has_more = True

    # pagination loop
    while has_more:
        if cursor != 0:
            query_body.update({"search_id": search_id, 
                               "cursor": cursor})
        search_id, has_more, cursor, samples, status_code = make_request_video(endpoint, query_body, query_params, headers, save_folder)

        with open(f"./download_logs_{start_date}_{end_date}_{search_id}", "a") as f:
            f.write(f"{cursor}\t{has_more}\t{samples}\t{search_id}\t{status_code}\n")

        print(f"{cursor}\t{has_more}\t{samples}\t{search_id}\t{status_code}")
        time.sleep(5)
        
    return status_code

In [None]:
def make_request_comments(endpoint_name, query_body, query_params, headers, save_folder):

    video_id = query_body["video_id"]
    prev_cursor = query_body["cursor"]
    endpoints_dict = {"videos": "https://open.tiktokapis.com/v2/research/video/query/",
                     "comments":"https://open.tiktokapis.com/v2/research/video/comment/list/"}
    endpoint = endpoints_dict[endpoint_name]

    # make post request
    response = requests.post(endpoint, json=query_body, params=query_params, headers=headers)
    status_code = response.status_code

    if response.status_code != 200:
        return video_id, False, prev_cursor, response.json(), status_code 

    
    # extracting information for pagination
    data = response.json().get("data", {})
    
    has_more = data["has_more"]
    cursor = data["cursor"]

    # saving queried data
    records = data.get(endpoint_name, [])
    df = pd.DataFrame(records)
    if len(df) != 0:
        df.to_csv(f"{save_folder}/{endpoint_name}_{video_id}_{cursor}.csv", index=False)

    return video_id, has_more, cursor, len(df), status_code

In [None]:
def query_api_comments(endpoint_name, fields, cursor, video_id, save_folder, log_name):
    
    # getting credentials
    client_key = os.getenv("CLIENT_KEY")
    client_secret = os.getenv("CLIENT_SECRET")
    
    credentials = get_access_token(client_key, client_secret)
    access_token = credentials["access_token"]
    token_type = credentials["token_type"]
    
    query_params = {"fields": fields}
    query_body = {"video_id":video_id, "max_count":100, "cursor":cursor}
    
    headers = {"Content-Type": "application/json",
               "Authorization": f"{token_type} {access_token}"}
    
    has_more = True

    # pagination loop
    while has_more:
        query_body.update({"cursor": cursor})
            
        video_id, has_more, cursor, samples, status_code = make_request_comments(endpoint_name, query_body, query_params, headers, save_folder)

        with open(f"./download_logs_{log_name}", "a") as f:
            f.write(f"{cursor}\t{has_more}\t{samples}\t{video_id}\t{status_code}\n")

        print(f"{cursor}\t{has_more}\t{samples}\t{video_id}\t{status_code}")
        time.sleep(5)
        
    return status_code

In [None]:
def get_comments(video_ids, endpoint_name, fields, cursor, save_folder, log_name):
    status_code = 200
    for video_id in video_ids:
        if status_code != 429:
            status_code = query_api_comments(endpoint_name, fields, cursor, video_id, save_folder, log_name)
        else:
            break