# TikTok imports

In [1]:
import pandas as pd
import matplotlib as plt
from matplotlib import pyplot as plt
import time
from tqdm.auto import tqdm

import requests
from datetime import datetime

In [2]:
from ensembledata.api import EDClient

In [3]:
# initialize the ensembledata client
client = EDClient("M2OK8yIEX5UjTkfA")

# get a free token at https://dashboard.ensembledata.com/register
# free trial of 50 units per day for 7 days, starting 23/11/24
#
# costs:
#  - 1 unit = 200 posts in full_hashtag_search :)
#  - 1 unit = 20 posts in hashtag_search
#  - 1 unit = 20 posts in keyword_search or full_keyword_search :(
#  - 1 unit = 30 comments to a post
#  - 1 unit = 30 replies to a comment
#
# documentation available at https://ensembledata.com/apis/docs#tag/Tiktok
# and https://github.com/EnsembleData/tiktok-scraper
# source code available at
# https://github.com/EnsembleData/ensembledata-python/tree/main/ensembledata/api

# Read posts from TikTok

Hashtag search (200 posts at the cost of 1 unit)

In [4]:
# read posts related to a hashtag
result = client.tiktok.full_hashtag_search(
                hashtag = "hurricaneidalia", # hashtag name
                days = 550,   # ADJUST FOR HURRICANE DATE !!!!!!
                remap_output = True, # False for mobile app, True for web
                max_cursor = 4000) # max number of posts fetched,
                                  # cost is 1 unit per 200 posts

In [5]:
# convert json to dataframe
df = pd.json_normalize(result.data["posts"])

In [6]:
# extract relevant info
df = df[['itemInfos.id', 'itemInfos.createTime', 'itemInfos.authorId',
         'itemInfos.text', 'itemInfos.video.urls', 'itemInfos.shareCount',
         'itemInfos.playCount', 'itemInfos.commentCount']]

# rename columns
df.columns = ['id', 'createTime', 'authorId', 'text', 'video.urls',
              'shareCount', 'playCount', 'commentCount']

# format time to date in string format
df["createTime"] = [str(datetime.fromtimestamp(int(i)).date())
                                                for i in df["createTime"]]

In [12]:
df = df.sort_values(by=['createTime'], ascending=True).reset_index(drop=True)
df

Unnamed: 0,id,createTime,authorId,text,video.urls,shareCount,playCount,commentCount
0,7271051377613212971,2023-08-25,7097557512820393003,hurricane idalia 2023 #florida #hurricane #tro...,[https://api16-normal-c-useast2a.tiktokv.com/a...,19,12697,36
1,7271080230721277227,2023-08-25,6788321418486170630,#herewego #hurricane #hurricaneidalia #hereweg...,[https://api16-normal-c-useast2a.tiktokv.com/a...,50,22142,8
2,7271643255589080362,2023-08-26,6782720514957067269,#TropicalStormIdalia likely to form over the n...,[https://v77.tiktokcdn-eu.com/3f5d6325e016b9ab...,1440,142843,286
3,7271413452378230062,2023-08-26,6782720514957067269,Don‚Äôt turn your back on the Gulf in late Augus...,[https://api16-normal-c-useast2a.tiktokv.com/a...,413,114357,108
4,7271737498571722026,2023-08-26,6725787885481821190,#hurricane #florida #hurricaneidalia #fyp,[https://api16-normal-c-useast2a.tiktokv.com/a...,31,28254,18
...,...,...,...,...,...,...,...,...
3308,7423455155091410222,2024-10-08,7191602200883364910,The Weird Reason Hurricanes Have Names üò≤üå™Ô∏è #Hu...,[https://api16-normal-c-useast2a.tiktokv.com/a...,6,5980,6
3309,7423879220591480095,2024-10-09,6976471700804895749,Praying for everyone affected by Hurricane Mil...,[https://sf16-ies-music-sg.tiktokcdn.com/obj/t...,0,2226,0
3310,7424503338349137194,2024-10-11,7385435642845365290,my hurricane idalia experience! Hurricane Idal...,[https://api16-normal-c-useast2a.tiktokv.com/a...,105,321033,48
3311,7427928272144551211,2024-10-20,7384909845448770603,Lol #hurricanemilton #hurricanehelene #hurrica...,[https://api16-normal-c-useast2a.tiktokv.com/a...,41,97231,127


In [13]:
for i in range(len(df)):
  if df["createTime"][i]<"2023-08-25":
    df=df.drop(i)
  if df["createTime"][i]>"2023-10-9":
    df=df.drop(i)

In [14]:
# order by comment count
df = df.sort_values(by=['commentCount'], ascending=False).reset_index(drop=True)

# delete posts with less than 30 comments
df = df[df['commentCount'] >= 30]

# state how many
print(f'{len(df)} posts for hashtags with more than 30 comments')

1087 posts for hashtags with more than 30 comments


In [15]:
df['createTime'].unique() 

array(['2023-08-27', '2023-08-31', '2023-08-30', '2023-08-29',
       '2023-08-28', '2023-09-26', '2023-10-09', '2023-09-02',
       '2023-09-17', '2023-09-01', '2023-09-30', '2023-10-05',
       '2023-09-07', '2023-09-04', '2023-10-04', '2023-09-05',
       '2023-09-22', '2023-08-26', '2023-10-24', '2023-09-25',
       '2023-09-14', '2023-09-03', '2023-10-03', '2023-09-16',
       '2023-09-11', '2023-10-20', '2023-09-09', '2023-09-10',
       '2023-09-12', '2023-09-08', '2023-09-06', '2023-08-25',
       '2023-09-20'], dtype=object)

In [16]:
len(df['createTime'].unique())  # we take the comments of x days

33

In [17]:
df = df.loc[df.groupby('createTime')['commentCount'].idxmax()]

In [18]:
# order df by date
df = df.sort_values(by=['createTime'], ascending=True).reset_index(drop=True)

df

Unnamed: 0,id,createTime,authorId,text,video.urls,shareCount,playCount,commentCount
0,7271051377613212971,2023-08-25,7097557512820393003,hurricane idalia 2023 #florida #hurricane #tro...,[https://api16-normal-c-useast2a.tiktokv.com/a...,19,12697,36
1,7271643255589080362,2023-08-26,6782720514957067269,#TropicalStormIdalia likely to form over the n...,[https://v77.tiktokcdn-eu.com/3f5d6325e016b9ab...,1440,142843,286
2,7272071106490600747,2023-08-27,6966634939376157702,The arrogance and flippant attitude of some of...,[https://api16-normal-c-useast2a.tiktokv.com/a...,7031,673424,5197
3,7272389824391269674,2023-08-28,6787453173931852805,#denisphillips #hurricaneidalia üåÄ#jennysellsth...,[https://v77.tiktokcdn-eu.com/723e5c9464024a9e...,1728,1301460,1931
4,7272535297378766123,2023-08-29,6790813865506636805,Please dont drain your pool! Praying for all m...,[https://v45.tiktokcdn-eu.com/83c7d73aac2a9cf1...,11233,26980257,3796
5,7273191672803101994,2023-08-30,6610797907615940613,White House Press Secretary Karine Jean-Pierre...,[https://api16-normal-c-useast2a.tiktokv.com/a...,590,504711,3964
6,7273270653707390241,2023-08-31,7239587261129704454,"In seconds, the Idalia tsunami formed #idalia ...",[https://api16-normal-c-useast2a.tiktokv.com/a...,19893,16468067,4096
7,7273620466760305963,2023-09-01,7176441536266404907,Makes me sad that out of all the cars that cou...,[https://api16-normal-c-useast2a.tiktokv.com/a...,5710,531473,892
8,7274007768485367083,2023-09-02,6847548206487340038,The Florida governor rejected millions in clim...,[https://v16m.tiktokcdn-eu.com/69a72e4fde39a61...,1486,113686,1105
9,7274413941835681070,2023-09-03,6808945376549962758,President Biden represents all Americans. He s...,[https://api16-normal-c-useast2a.tiktokv.com/a...,59,52421,199


In [20]:
df.to_csv('idalia_posts.csv', index=False)