# TikTok imports

In [None]:
import pandas as pd
import matplotlib as plt
from matplotlib import pyplot as plt
import time
from tqdm.auto import tqdm

import requests
from datetime import datetime

In [None]:
from ensembledata.api import EDClient

In [None]:
# initialize the ensembledata client
client = EDClient("tHMlAfSzgpDhxmFA")

# get a free token at https://dashboard.ensembledata.com/register
# free trial of 50 units per day for 7 days, starting 23/11/24
#
# costs:
#  - 1 unit = 200 posts in full_hashtag_search :)
#  - 1 unit = 20 posts in hashtag_search
#  - 1 unit = 20 posts in keyword_search or full_keyword_search :(
#  - 1 unit = 30 comments to a post
#  - 1 unit = 30 replies to a comment
#
# documentation available at https://ensembledata.com/apis/docs#tag/Tiktok
# and https://github.com/EnsembleData/tiktok-scraper
# source code available at
# https://github.com/EnsembleData/ensembledata-python/tree/main/ensembledata/api

# Read posts from TikTok

Hashtag search (200 posts at the cost of 1 unit)

In [None]:
# read posts related to a hashtag
result = client.tiktok.full_hashtag_search(
                hashtag = "climatechange", # hashtag name
                days = 900,   # ADJUST FOR HURRICANE DATE !!!!!!
                remap_output = True, # False for mobile app, True for web
                max_cursor = 4000) # max number of posts fetched,
                                  # cost is 1 unit per 200 posts

In [None]:
# convert json to dataframe
df = pd.json_normalize(result.data["posts"])

In [None]:
# extract relevant info
df = df[['itemInfos.id', 'itemInfos.createTime', 'itemInfos.authorId',
         'itemInfos.text', 'itemInfos.video.urls', 'itemInfos.shareCount',
         'itemInfos.playCount', 'itemInfos.commentCount']]

# rename columns
df.columns = ['id', 'createTime', 'authorId', 'text', 'video.urls',
              'shareCount', 'playCount', 'commentCount']

df["createTime"] = [str(datetime.fromtimestamp(int(i)).date())
                                                for i in df["createTime"]]


In [None]:
df

In [None]:
df = df[(df["createTime"] >= "2022-10-28") & (df["createTime"] <= "2022-11-28")]


In [None]:
# order by comment count
df = df.sort_values(by=['commentCount'], ascending=False).reset_index(drop=True)

# delete posts with less than 30 comments
df = df[df['commentCount'] >= 30]

# state how many
print(f'{len(df)} posts for hashtags with more than 30 comments')

114 posts for hashtags with more than 30 comments


In [None]:
df['createTime'].unique()

array(['2022-10-24', '2022-10-14', '2022-09-24', '2022-10-19',
       '2022-10-16', '2022-09-18', '2022-09-14', '2022-09-20',
       '2022-10-17', '2022-09-30', '2022-09-15', '2022-09-23',
       '2022-10-03', '2022-10-13', '2022-09-17', '2022-10-02',
       '2022-10-08', '2022-10-18', '2022-10-12', '2022-10-05',
       '2022-10-26', '2022-10-11', '2022-10-28', '2022-09-19',
       '2022-09-16', '2022-09-28', '2022-09-29', '2022-10-10',
       '2022-10-22', '2022-10-06', '2022-09-27', '2022-10-07',
       '2022-09-26', '2022-10-01', '2022-09-25', '2022-10-20',
       '2022-09-21', '2022-10-21'], dtype=object)

In [None]:
len(df['createTime'].unique())  # we take the comments of x days

38

In [None]:
df = df.loc[df.groupby('createTime')['commentCount'].idxmax()]

In [None]:
# order df by date
df = df.sort_values(by=['createTime'], ascending=True).reset_index(drop=True)

df

Unnamed: 0,id,createTime,authorId,text,video.urls,shareCount,playCount,commentCount
0,7143344050417077509,2022-09-14,6606355564501762053,#mtl was fun yesterday!! #mtltiktok #rain#inno...,['https://v77.tiktokcdn-eu.com/99fcbc45eec4fb3...,4790,5864825,4972
1,7143623069284748550,2022-09-15,6966234696955872261,Together we can still do it 🙏🏼🌎 #saveourplanet...,['https://v16m.tiktokcdn-eu.com/4f1314ce092dfc...,3736,2943007,3036
2,7143962475757604101,2022-09-16,6757754163742032902,Forza Marche! 🇮🇹 #neiperte #climatechange #ita...,['https://v15m.tiktokcdn-eu.com/bf5de354933e63...,847,573977,961
3,7144330799645543685,2022-09-17,7125391387152270341,i haven’t posted in awhile #fyp #foryou #foryo...,['https://v77.tiktokcdn-eu.com/4d701426728dd34...,2099,2300380,2339
4,7144573582440484122,2022-09-18,6928760643127264258,#ทางออก #ทุกปัญหา #ทุกปัญหามีทางออก ไม่มีการชน...,['https://v16m.tiktokcdn-eu.com/7e7cf15b97d61c...,1174,2453440,5011
5,7144844623851064618,2022-09-19,6738446549145764869,Hurricane Fiona brought powerful winds and cat...,['https://v77.tiktokcdn-eu.com/81842ef0022218e...,546,762135,972
6,7145493259815603461,2022-09-20,7144735520425526278,Why Climate Change Is Fake #climatechange #fyp...,['https://v77.tiktokcdn-eu.com/083cf56998330e7...,1029,1355917,4784
7,7145640089220222254,2022-09-21,6749773962819486726,#greenscreen the more we ignore our climate em...,['https://api16-normal-c-useast2a.tiktokv.com/...,390,167931,305
8,7146396688104590634,2022-09-23,6643983493737791493,Let's all just calm down and trust God to take...,['https://v16m.tiktokcdn-eu.com/4708c843e7ebd4...,4456,1063001,2829
9,7146900341810810139,2022-09-24,6928760643127264258,#ประโยชน์ ของการ #กินมื้อเดียว #ยิ่งหิวยิ่งสุข...,['https://v45.tiktokcdn-eu.com/b18d5df8f3a041f...,22101,4328050,6987


In [None]:
df.to_csv('climatechange_ian_posts.csv', index=False)