# TikTok imports

In [None]:
import pandas as pd
import matplotlib as plt
from matplotlib import pyplot as plt
import time
from tqdm.auto import tqdm

import requests
from datetime import datetime

In [None]:
from ensembledata.api import EDClient

In [None]:
# initialize the ensembledata client
client = EDClient("M2OK8yIEX5UjTkfA")

# get a free token at https://dashboard.ensembledata.com/register
# free trial of 50 units per day for 7 days, starting 23/11/24
#
# costs:
#  - 1 unit = 200 posts in full_hashtag_search :)
#  - 1 unit = 20 posts in hashtag_search
#  - 1 unit = 20 posts in keyword_search or full_keyword_search :(
#  - 1 unit = 30 comments to a post
#  - 1 unit = 30 replies to a comment
#
# documentation available at https://ensembledata.com/apis/docs#tag/Tiktok
# and https://github.com/EnsembleData/tiktok-scraper
# source code available at
# https://github.com/EnsembleData/ensembledata-python/tree/main/ensembledata/api

# Read posts from TikTok

Hashtag search (200 posts at the cost of 1 unit)

In [None]:
# read posts related to a hashtag
result = client.tiktok.full_hashtag_search(
                hashtag = "hurricaneian", # hashtag name
                days = 900,   # ADJUST FOR HURRICANE DATE !!!!!!
                remap_output = True, # False for mobile app, True for web
                max_cursor = 4000) # max number of posts fetched,
                                  # cost is 1 unit per 200 posts

In [None]:
# convert json to dataframe
df = pd.json_normalize(result.data["posts"])

In [None]:
# extract relevant info
df = df[['itemInfos.id', 'itemInfos.createTime', 'itemInfos.authorId',
         'itemInfos.text', 'itemInfos.video.urls', 'itemInfos.shareCount',
         'itemInfos.playCount', 'itemInfos.commentCount']]

# rename columns
df.columns = ['id', 'createTime', 'authorId', 'text', 'video.urls',
              'shareCount', 'playCount', 'commentCount']

# format time to date in string format
df["createTime"] = [str(datetime.fromtimestamp(int(i)).date())
                                                for i in df["createTime"]]

In [17]:
for i in range(len(df)):
  if df["createTime"][i]<"2022-09-14":
    df=df.drop(i)
  if df["createTime"][i]>"2022-10-28":
    df=df.drop(i)

In [18]:
# order by comment count
df = df.sort_values(by=['commentCount'], ascending=False).reset_index(drop=True)

# delete posts with less than 30 comments
df = df[df['commentCount'] >= 30]

# state how many
print(f'{len(df)} posts for hashtags with more than 30 comments')

35 posts for hashtags with more than 30 comments


In [19]:
df['createTime'].unique() 

array(['2022-09-29', '2022-09-30', '2022-10-05', '2022-10-07',
       '2022-10-01', '2022-09-28', '2022-10-03', '2022-09-27',
       '2022-10-23', '2022-10-10', '2022-09-26', '2022-10-19',
       '2022-10-02', '2022-10-16', '2022-10-04', '2022-09-25',
       '2022-10-11', '2022-10-06', '2022-09-24', '2022-10-22',
       '2022-10-09', '2022-10-15', '2022-10-21', '2022-10-14',
       '2022-10-20', '2022-10-08', '2022-10-26', '2022-09-23',
       '2022-10-27', '2022-10-12', '2022-10-18', '2022-10-17',
       '2022-10-13', '2022-10-25', '2022-10-24'], dtype=object)

In [20]:
len(df['createTime'].unique())  # we take the comments of x days

35

In [21]:
df = df.loc[df.groupby('createTime')['commentCount'].idxmax()]

In [22]:
# order df by date
df = df.sort_values(by=['createTime'], ascending=True).reset_index(drop=True)

df

Unnamed: 0,id,createTime,authorId,text,video.urls,shareCount,playCount,commentCount
0,7146640218739723563,2022-09-23,6603781368773361669,Welcome to the Jungle… #florida #floridaman #h...,[https://v77.tiktokcdn-eu.com/30432e890a177bec...,4132,323395,613
1,7146891037787835694,2022-09-24,6865054517386806277,Florida greeting hurricanes #hurricane #florid...,[https://v77.tiktokcdn-eu.com/ad1497f06331bf9c...,13402,1346467,1785
2,7147128979391139114,2022-09-25,6935805351829685254,#hurricaneian #ian #florida #stateofemergency,[https://v16m.tiktokcdn-eu.com/891c75177f05f74...,9047,6085683,3099
3,7147742399043931438,2022-09-26,6980433469270033413,#stormwork #hurricane #hurricaneian #florida #...,[https://v45.tiktokcdn-eu.com/c00e545346ba923a...,13258,2484533,5629
4,7147826662787517742,2022-09-27,6758580322201814022,😂😂 Nah people in Florida take evey Hurricane a...,[https://v77.tiktokcdn-eu.com/ddd027df76e774f5...,11372,1372885,6242
5,7148507639553576235,2022-09-28,7056593308990440494,RESCUE HELP NEEDED AS SOON AS POSSIBLE!! 3 ad...,[https://v58.tiktokcdn-eu.com/video/tos/maliva...,9983,1798567,8863
6,7148581794428816683,2022-09-29,6618333664294895621,#FortMyers #HurricaneIan #SWFL #Boat,[https://v77.tiktokcdn-eu.com/de311cb76e0d400c...,85321,15116694,17079
7,7149273125157424389,2022-09-30,6666826928797777925,,[https://v45.tiktokcdn-eu.com/f4a60304944b4788...,306,523675,13212
8,7149626618011667758,2022-10-01,6951041408883491845,Fort Myers Beach before and after Hurricane Ia...,[https://v16m.tiktokcdn-eu.com/990f5fc8a5799b4...,58122,12862996,9080
9,7149856973113003269,2022-10-02,6850511303556383750,🌪️ #fyp #uk #usa #hurricaneian,[https://v77.tiktokcdn-eu.com/c5977c8238150c08...,3404,1822983,3991


In [23]:
df.to_csv('ian_posts.csv', index=False)