In [1]:
import random
import pyktok as pyk
import pandas as pd
from tqdm import tqdm
import os
import time

In [6]:
# Check if TikTok is available
def test_available(url,sleep=4,meta_fn = './test'):
    try:
        pyk.save_tiktok(url,False,meta_fn,verbose=False)
    except:
        print(f"TikTok {url} is not available")
        time.sleep(random.randint(1, sleep))
        return 0
    time.sleep(random.randint(1, sleep))
    os.remove(meta_fn)
    return 1

def get_all_data():
    output_path = './data/output'
    data_folders = os.listdir(output_path)
    print("Data Folders in Use: ",data_folders)
    df = pd.DataFrame()
    for folder in tqdm(data_folders,desc='Gathering Data'):
        for file in os.listdir(os.path.join(output_path,folder)):
            if file.startswith('output_hashtags'):
                df = pd.concat([df,pd.read_csv(output_path+'/'+folder+'/'+file)])
    return df

In [7]:
%pwd

'c:\\SOMA\\tiktok\\TikTok-YouTube\\Tiktok'

In [8]:
df = get_all_data()
print(df.head())
print(df.info())

Data Folders in Use:  ['output_hashtags_alcohol_excessive', 'output_hashtags_alcohol_general', 'output_hashtags_cannabis_edibles', 'output_hashtags_cannabis_smoke', 'output_hashtags_cigar', 'output_hashtags_cocaine', 'output_hashtags_hookah', 'output_hashtags_mdma', 'output_hashtags_mushrooms', 'output_hashtags_test', 'output_hashtags_vape']


Gathering Data: 100%|██████████| 11/11 [00:00<00:00, 100.91it/s]

      hashtag           user_name       user_id      video_id  \
0  alcoholtok  realtipsybartender  1.775364e+07  7.050000e+18   
1       drunk         childishmau  6.800000e+18  7.010000e+18   
2       drunk        torrishelton  6.790000e+18  6.970000e+18   
3  alcoholtok  realtipsybartender  1.775364e+07  7.050000e+18   
4       drunk         danielamazz  6.810000e+18  6.900000e+18   

                                   video_description video_create_time  \
0  So delicious! 😋 #tipsybartender #cocktails #dr...  10-01-2022 14:46   
1          Lightweights 😆😆😆#fyp #drunk #comedy #fypシ  03-10-2021 12:08   
2  Alcohol pops 🤪 #mixeddrinks #popsicles #drunk ...  30-05-2021 10:42   
3  You gotta see what happens when I drop it… #ti...  05-01-2022 18:49   
4                                   #drunk 💕@vz_ccme  27-11-2020 19:31   

   video_length                                         video_link   n_likes  \
0            45  https://www.tiktok.com/@realtipsybartender/vid...   3700000   
1   




In [9]:
tqdm.pandas()
df['available'] = df['video_link'].progress_apply(test_available)
df['last_availablity_check'] = df['available'].progress_apply(lambda x: pd.Timestamp.now('US/Pacific'))
print(f"{len(df[df.available == 0])} TikToks are not available")

  0%|          | 0/200 [00:00<?, ?it/s]

TikTok https://www.tiktok.com/@realtipsybartender/video/7051712497962781998?lang=en is not available


  1%|          | 2/200 [00:05<09:06,  2.76s/it]

TikTok https://www.tiktok.com/@childishmau/video/7014918884851600645?lang=en is not available


  2%|▏         | 3/200 [00:11<13:25,  4.09s/it]

TikTok https://www.tiktok.com/@torrishelton/video/6968140029403122950?lang=en is not available


  2%|▏         | 4/200 [00:16<13:57,  4.27s/it]

TikTok https://www.tiktok.com/@realtipsybartender/video/7049919722183363886?lang=en is not available


  2%|▎         | 5/200 [00:21<15:21,  4.72s/it]

TikTok https://www.tiktok.com/@danielamazz/video/6900012143605812482?lang=en is not available


  3%|▎         | 6/200 [00:26<15:56,  4.93s/it]

TikTok https://www.tiktok.com/@marissam011/video/6866603549372026117?lang=en is not available


  3%|▎         | 6/200 [00:31<17:00,  5.26s/it]


KeyboardInterrupt: 

In [None]:
df.to_excel('./data/available.xlsx',index=False)