# TikTok imports

In [None]:
import pandas as pd
import matplotlib as plt
from matplotlib import pyplot as plt
import time
from tqdm.auto import tqdm

import requests
from datetime import datetime

In [None]:
from ensembledata.api import EDClient

In [None]:
# initialize the ensembledata client
client = EDClient("M2OK8yIEX5UjTkfA")

# get a free token at https://dashboard.ensembledata.com/register
# free trial of 50 units per day for 7 days, starting 23/11/24
#
# costs:
#  - 1 unit = 200 posts in full_hashtag_search :)
#  - 1 unit = 20 posts in hashtag_search
#  - 1 unit = 20 posts in keyword_search or full_keyword_search :(
#  - 1 unit = 30 comments to a post
#  - 1 unit = 30 replies to a comment
#
# documentation available at https://ensembledata.com/apis/docs#tag/Tiktok
# and https://github.com/EnsembleData/tiktok-scraper
# source code available at
# https://github.com/EnsembleData/ensembledata-python/tree/main/ensembledata/api

# Read posts from TikTok

Hashtag search (200 posts at the cost of 1 unit)

In [None]:
# read posts related to a hashtag
result = client.tiktok.full_hashtag_search(
                hashtag = "climatechange", # hashtag name
                days = 900,   # ADJUST FOR HURRICANE DATE !!!!!!
                remap_output = True, # False for mobile app, True for web
                max_cursor = 3500) # max number of posts fetched,
                                  # cost is 1 unit per 200 posts

In [None]:
# convert json to dataframe
df = pd.json_normalize(result.data["posts"])

In [None]:
# extract relevant info
df = df[['itemInfos.id', 'itemInfos.createTime', 'itemInfos.authorId',
         'itemInfos.text', 'itemInfos.video.urls', 'itemInfos.shareCount',
         'itemInfos.playCount', 'itemInfos.commentCount']]

# rename columns
df.columns = ['id', 'createTime', 'authorId', 'text', 'video.urls',
              'shareCount', 'playCount', 'commentCount']

df["createTime"] = [str(datetime.fromtimestamp(int(i)).date())
                                                for i in df["createTime"]]


In [None]:
df

Unnamed: 0,id,createTime,authorId,text,video.urls,shareCount,playCount,commentCount
0,7437818533284760865,2024-11-16,7303599367192478753,#climatechange #davidattenborough #globalwarmi...,[https://v77.tiktokcdn-eu.com/475107103afd5873...,113841,13831239,13536
1,7378264093779692843,2024-06-09,6583142791119306758,Like be so f-ing for real guys #climatechange ...,[https://v77.tiktokcdn-eu.com/9f75fb30d0c4ce67...,5177,955361,2782
2,7206613856376933634,2023-03-04,7120349466079151106,Everyone can help! #climatechange #saveearth #...,[https://v77.tiktokcdn-eu.com/3026176419e13d5c...,35715,2053641,5888
3,7329576474367773994,2024-01-29,6714257599150326790,interesting #climatechange & #MentalHealth story,[https://v77.tiktokcdn-eu.com/89dde4e87bc8b684...,12813,1933795,4107
4,7437892758251113784,2024-11-16,7034404685826556933,#globalwarming #climatechange #polarbear #sad ...,[https://v77.tiktokcdn-eu.com/e7d70e611d468e84...,13751,1506710,2976
...,...,...,...,...,...,...,...,...
1688,7148527284805455150,2022-09-28,6829111343543649285,#stitch with @random_crow Fossil fuels are fin...,[https://v45.tiktokcdn-eu.com/3a8fb06be808988c...,161,212298,640
1689,7247170518816787738,2023-06-21,6846128426316678149,Klima Aktivist vs Bergbauer🐻🐺 #climatechange #...,[https://v45.tiktokcdn-eu.com/26992ac15e32c0e6...,2259,415077,154
1690,7252884311345515781,2023-07-07,6838681319300891653,Así reforma en estos momentos ⛈️ hasta aquí mi...,[https://v45.tiktokcdn-eu.com/354379225a32c071...,2804,949605,239
1691,7227923557991369990,2023-04-30,6853621171482510341,"""The Dirty Truth: Inadequate Support for Garba...",[https://v45.tiktokcdn-eu.com/338fbb965fa37220...,1338,256938,1257


In [None]:
df = df[(df["createTime"] >= "2023-10-09") & (df["createTime"] <= "2023-11-09")]


In [None]:
# order by comment count
df = df.sort_values(by=['commentCount'], ascending=False).reset_index(drop=True)

# delete posts with less than 30 comments
df = df[df['commentCount'] >= 30]

# state how many
print(f'{len(df)} posts for hashtags with more than 30 comments')

75 posts for hashtags with more than 30 comments


In [None]:
df['createTime'].unique()

array(['2023-10-08', '2023-09-28', '2023-09-29', '2023-09-21',
       '2023-09-05', '2023-09-20', '2023-09-06', '2023-09-08',
       '2023-09-10', '2023-09-12', '2023-09-09', '2023-08-29',
       '2023-10-06', '2023-09-03', '2023-09-07', '2023-10-01',
       '2023-10-03', '2023-09-24', '2023-09-25', '2023-10-09',
       '2023-09-01', '2023-09-04', '2023-09-19', '2023-10-02',
       '2023-09-22', '2023-09-15', '2023-08-26', '2023-09-16',
       '2023-09-30', '2023-10-05', '2023-09-13', '2023-10-04',
       '2023-08-28', '2023-08-25', '2023-08-30', '2023-10-07',
       '2023-09-23', '2023-09-26'], dtype=object)

In [None]:
len(df['createTime'].unique())  # we take the comments of x days

38

In [None]:
df = df.loc[df.groupby('createTime')['commentCount'].idxmax()]

In [None]:
# order df by date
df = df.sort_values(by=['createTime'], ascending=True).reset_index(drop=True)

df

Unnamed: 0,id,createTime,authorId,text,video.urls,shareCount,playCount,commentCount
0,7271342302382148907,2023-08-25,6803462802675844101,"Out with cars, in with bikes & public transit ...",[https://sf16-music-sign.tiktokcdn.com/obj/tos...,932,393410,364
1,7271410119622020395,2023-08-26,6777593631172838406,#goodmorningbadnews #boomers #climate #climate...,[https://v77.tiktokcdn-eu.com/ef7a54a6353bfd5a...,1811,365905,728
2,7272451010495302954,2023-08-28,7023454894796719110,Clear-air turbulence is becoming more frequent...,[https://v45.tiktokcdn-eu.com/5dc8a8e9d56c9171...,7324,765558,565
3,7272820833368739114,2023-08-29,6724659169629340678,We talked to this guy on a record-setting hot ...,[https://v16m.tiktokcdn-eu.com/8df0ce1f5262a1c...,4840,942926,2360
4,7273122381269617925,2023-08-30,19663988,SKL Vid Ep. XXX: Is the Philippines the countr...,[https://v16m.tiktokcdn-eu.com/cf7b4626c39baf7...,581,486559,292
5,7273854909378759969,2023-09-01,6970050888732279814,#anprimgang #anprimmemes #nature #naturelovers...,[https://sf16-ies-music-sg.tiktokcdn.com/obj/t...,2328,2154688,1207
6,7274689988548185376,2023-09-03,7192662362943734789,Explore what's happening to the climate. Suppo...,[https://v77.tiktokcdn-eu.com/a0b89f867a65ecb5...,21332,1677128,2084
7,7275083944171719979,2023-09-04,6668094621179428870,and weather says its about to rain again 🙃 #fy...,[https://v45.tiktokcdn-eu.com/d1460c1088874f88...,7612,909926,1174
8,7275412673473514798,2023-09-05,6940967490765128710,#greenscreenvideo #fyp #trending #facts #feeli...,[https://v15m-perf.tiktokcdn-eu.com/cd82d4836b...,6872,259582,4309
9,7275799559224282374,2023-09-06,7165860667278279685,#africaclimatesummit #african #africa #african...,[https://v77.tiktokcdn-eu.com/4c794eb732d85c96...,4398,4156604,3209


In [None]:
df.to_csv('climatechange_idalia_posts.csv', index=False)