In [4]:
## Import Libraries
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
import warnings

import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode, iplot, plot
from plotly.subplots import make_subplots

from IPython.core.display import HTML
from IPython.display import Image, Audio, Video

%matplotlib inline
warnings.filterwarnings("ignore")

In [5]:
def my_cut (x, bins, lower_infinite=False, upper_infinite=False, **kwargs):
    # Quick passthru if no infinite bounds
    if not lower_infinite and not upper_infinite:
        return pd.cut(x, bins, **kwargs)

    # Setup
    num_labels      = len(bins) - 1
    include_lowest  = kwargs.get("include_lowest", False)
    right           = kwargs.get("right", True)

    # Prepend/Append infinities where indiciated
    bins_final = bins.copy()
    if upper_infinite:
        bins_final.insert(len(bins),float("inf"))
        num_labels += 1
    if lower_infinite:
        bins_final.insert(0,float("-inf"))
        num_labels += 1

    # Decide all boundary symbols based on traditional cut() parameters
    symbol_lower  = "<=" if include_lowest and right else "<"
    left_bracket  = "(" if right else "["
    right_bracket = "]" if right else ")"
    symbol_upper  = ">" if right else ">="

    # Inner function reused in multiple clauses for labeling
    def make_label(i, lb=left_bracket, rb=right_bracket):
        return "{0} - {1}".format(bins_final[i], bins_final[i+1])
    
   # Create custom labels
    labels=[]
    for i in range(0,num_labels):
        new_label = None

        if i == 0:
            if lower_infinite:
                new_label = "{0} {1}".format(symbol_lower, bins_final[i+1])
            elif include_lowest:
                new_label = make_label(i, lb="[")
            else:
                new_label = make_label(i)
        elif upper_infinite and i == (num_labels - 1):
            new_label = "{0} {1}".format(symbol_upper, bins_final[i])
        else:
            new_label = make_label(i)

        labels.append(new_label)

    # Pass thru to pandas cut()
    return pd.cut(x, bins_final, labels=labels, **kwargs)

In [6]:
# open filel with the raw data
file = open(r"C:\Users\User\AppData\Roaming\Python\Python313\site-packages\ML-Production\Tiktok-ML\trending.json", encoding="utf8")

raw_data = json.load(file)
file.close()

# select only the list with the video data
trending_videos_list = raw_data['collector']
print(json.dumps(trending_videos_list[15], indent=4, sort_keys=True))

{
    "authorMeta": {
        "avatar": "https://p77-sign-sg.tiktokcdn.com/tos-maliva-avt-0068/ce5595b5c4874a234d0e41feec9b4ef9~c5_1080x1080.jpeg?x-expires=1608649200&x-signature=HXQeWxkqcbcR2JEz9JRdZWOhJ%2Fw%3D",
        "id": "6563548229581422598",
        "name": "jhullyduarte",
        "nickName": "Jhully Duarte",
        "secUid": "MS4wLjABAAAAByNqbJCtA9PfwY1ICwGomesu7URs6GhushkQeWleqhqsTSRmCKKi1GpUXOWabnbI",
        "signature": "\ud83e\udd2a\ud83e\udd2a",
        "verified": false
    },
    "commentCount": 267,
    "covers": {
        "default": "https://p16-sign-sg.tiktokcdn.com/obj/tos-maliva-p-0068/6416d73acd5a47c0bb04c0982e4b973e?x-expires=1608584400&x-signature=qa5fI0p5HrD71YMq70ImAirJtzI%3D",
        "dynamic": "https://p16-sign-sg.tiktokcdn.com/obj/tos-maliva-p-0068/b45558f5231340cdbf9f34f5574cc98c_1608239182?x-expires=1608584400&x-signature=IWs7ofM62Z2%2FcvyxJhpYLZowafM%3D",
        "origin": "https://p16-sign-sg.tiktokcdn.com/obj/tos-maliva-p-0068/c137bcc58fcc4798b3052

In [35]:
# create a DataFrame of the data
df = pd.DataFrame(trending_videos_list)
df

Unnamed: 0,id,text,createTime,authorMeta,musicMeta,covers,webVideoUrl,videoUrl,videoUrlNoWaterMark,videoMeta,diggCount,shareCount,playCount,commentCount,downloaded,mentions,hashtags
0,6907228749016714497,Confidence went 📈,1608214517,"{'id': '6825540583826768902', 'secUid': 'MS4wL...","{'musicId': '6790057285814061057', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@ninakleij/video/690722...,https://v77.tiktokcdn.com/ed1f811617d7b5e18b8d...,,"{'height': 1024, 'width': 576, 'duration': 15}",3710,50,44800,68,True,[],[]
1,6875468410612993286,Quiet Zone... follow me on insta: joeysofo. Co...,1600819763,"{'id': '6729292817489986566', 'secUid': 'MS4wL...","{'musicId': '6678927595137977094', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@joeysofo/video/6875468...,https://v77.tiktokcdn.com/ab935f1975cb8b69aebf...,,"{'height': 1024, 'width': 576, 'duration': 11}",55700,1817,838100,936,True,[@dwight_schnuute],[]
2,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,1606228625,"{'id': '6791901371429913601', 'secUid': 'MS4wL...","{'musicId': '6881287666340481026', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@jackeyephone/video/689...,https://v21.tiktokcdn.com/video/tos/alisg/tos-...,,"{'height': 960, 'width': 540, 'duration': 19}",936200,21100,15300000,27100,True,[],"[{'id': '23428', 'name': 'tiktok', 'title': ''..."
3,6902819837345533186,,1607187987,"{'id': '6574079584262094854', 'secUid': 'MS4wL...","{'musicId': '6881107586830076678', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@naomivaneeren/video/69...,https://v21.tiktokcdn.com/video/tos/alisg/tos-...,,"{'height': 1024, 'width': 576, 'duration': 13}",12900,197,94900,143,True,[],[]
4,6905635666588192002,小技です👟✨#tiktok教室#tutorial,1607843600,"{'id': '6586854399372394498', 'secUid': 'MS4wL...","{'musicId': '6649693417590754053', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@io.dreamer_mk/video/69...,https://v77.tiktokcdn.com/7622979a5ceb97c6da4e...,,"{'height': 1024, 'width': 576, 'duration': 22}",8805,198,115300,52,True,[],"[{'id': '1627704798586882', 'name': 'tiktok教室'..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,6877191692341054721,#foryou #foryoupage,1601220970,"{'id': '6788447048146600965', 'secUid': 'MS4wL...","{'musicId': '6843814375833291525', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@artistmiranda/video/68...,https://v77.tiktokcdn.com/12644d87ac40de1c1ab2...,,"{'height': 1024, 'width': 576, 'duration': 13}",13300,152,129300,111,True,[],"[{'id': '42164', 'name': 'foryou', 'title': ''..."
996,6908069845825359109,Stop eating 💩 #gttfg #gotothegym #swolefam #nu...,1608410366,"{'id': '6718786745917506565', 'secUid': 'MS4wL...","{'musicId': '6908069867644013318', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@papaswolio/video/69080...,https://v77.tiktokcdn.com/740d0b0889f43900715c...,,"{'height': 1024, 'width': 576, 'duration': 20}",12200,223,80700,321,True,[],"[{'id': '1685997126071301', 'name': 'gttfg', '..."
997,6883484287434378497,#fy #foryoupage #foryou,1602686079,"{'id': '6792309272296129541', 'secUid': 'MS4wL...","{'musicId': '6863497148655225605', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@sanaelfarah/video/6883...,https://v77.tiktokcdn.com/eed7b93664d2cfa5b122...,,"{'height': 1024, 'width': 576, 'duration': 13}",26600,3392,449300,668,True,[],"[{'id': '153828', 'name': 'fy', 'title': '', '..."
998,6898721943978036481,regretss 📈 #fyp #foryou #curls,1606233872,"{'id': '69570057726554112', 'secUid': 'MS4wLjA...","{'musicId': '6807084883974851333', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@safae.kx/video/6898721...,https://v77.tiktokcdn.com/a402298fb8ed593ed456...,,"{'height': 1024, 'width': 576, 'duration': 12}",10000,111,72200,274,True,[],"[{'id': '229207', 'name': 'fyp', 'title': '', ..."


In [8]:
df = df.explode('hashtags').explode('mentions')
df = df.drop(['authorMeta', 'musicMeta', 'covers', 'videoMeta', 'hashtags'], axis=1)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5693 entries, 0 to 999
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   id                   5693 non-null   object
 1   text                 5693 non-null   object
 2   createTime           5693 non-null   int64 
 3   webVideoUrl          5693 non-null   object
 4   videoUrl             5693 non-null   object
 5   videoUrlNoWaterMark  5693 non-null   object
 6   diggCount            5693 non-null   int64 
 7   shareCount           5693 non-null   int64 
 8   playCount            5693 non-null   int64 
 9   commentCount         5693 non-null   int64 
 10  downloaded           5693 non-null   bool  
 11  mentions             1316 non-null   object
dtypes: bool(1), int64(5), object(6)
memory usage: 539.3+ KB


In [9]:
# make a function
def object_to_columns(dfRow, **kwargs):
    """Function to expand cells containing dictionaries, to columns"""
    for column, prefix in kwargs.items():
        if isinstance(dfRow[column], dict):
            for key, value in dfRow[column].items():
                columnName = '{}.{}'.format(prefix, key)
                dfRow[columnName] = value
    return dfRow

#df = df.apply(object_to_columns, 
 ##                          musicMeta='musicMeta',
   #                         covers='cover',
    #                        videoMeta='videoMeta',
     #                       hashtags='hashtag', axis = 1)

# Remove the original columns containing the dictionaries
df = pd.json_normalize(trending_videos_list)
df = df.explode('hashtags').explode('mentions').to_json(orient='records')
parsed = json.loads(df)
df = pd.json_normalize(parsed)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5693 entries, 0 to 5692
Data columns (total 38 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id                       5693 non-null   object 
 1   text                     5693 non-null   object 
 2   createTime               5693 non-null   int64  
 3   webVideoUrl              5693 non-null   object 
 4   videoUrl                 5693 non-null   object 
 5   videoUrlNoWaterMark      5693 non-null   object 
 6   diggCount                5693 non-null   int64  
 7   shareCount               5693 non-null   int64  
 8   playCount                5693 non-null   int64  
 9   commentCount             5693 non-null   int64  
 10  downloaded               5693 non-null   bool   
 11  mentions                 1316 non-null   object 
 12  hashtags                 0 non-null      float64
 13  authorMeta.id            5693 non-null   object 
 14  authorMeta.secUid       

In [10]:
# Get unique rows from dataset
df_unique_videos = df.drop_duplicates(subset='id', keep="first")
df_unique_music = df.drop_duplicates(subset='musicMeta.musicId', keep="first")
df_unique_authors = df.drop_duplicates(subset='authorMeta.id', keep="first")

# Show amount of rows per dataset
{
    'df_tiktok_dataset': df.shape,
    'df_unique_videos': df_unique_videos.shape,
    'df_unique_music': df_unique_music.shape,
    'df_unique_authors': df_unique_authors.shape
}

{'df_tiktok_dataset': (5693, 38),
 'df_unique_videos': (1000, 38),
 'df_unique_music': (907, 38),
 'df_unique_authors': (802, 38)}

In [11]:
df.describe()

Unnamed: 0,createTime,diggCount,shareCount,playCount,commentCount,hashtags,videoMeta.height,videoMeta.width,videoMeta.duration
count,5693.0,5693.0,5693.0,5693.0,5693.0,0.0,5693.0,5693.0,5693.0
mean,1604877000.0,56309.86,1481.563323,606227.8,701.559283,,977.729492,579.945547,20.568066
std,2282009.0,492792.7,8580.033748,4554259.0,8671.442272,,119.002955,78.260349,14.964944
min,1600785000.0,13.0,0.0,465.0,0.0,,352.0,240.0,4.0
25%,1602862000.0,3090.0,20.0,47100.0,51.0,,960.0,576.0,10.0
50%,1604961000.0,7320.0,82.0,81900.0,123.0,,1024.0,576.0,15.0
75%,1606932000.0,21400.0,388.0,197300.0,370.0,,1024.0,576.0,25.0
max,1608521000.0,31000000.0,220100.0,250800000.0,625700.0,,1248.0,1248.0,60.0


Data Analysis for Likes n comment Data

In [12]:
# set bucket ranges
buckets = list(range(0, 105000, 5000)) # range like around 

likes = df_unique_videos.groupby(my_cut(df_unique_videos['diggCount'], buckets, upper_infinite=True)).diggCount.count()
comments = df_unique_videos.groupby(my_cut(df_unique_videos['commentCount'], buckets, upper_infinite=True)).diggCount.count()

# Transform from series to dataframe with some small modifications
likes = likes.rename('likes').to_frame().reset_index()
comments = comments.rename('comments').to_frame().reset_index()

# create subplots, two rows and 1 column each row
fig = make_subplots(2, 1, subplot_titles=("Distribution of Likes", "Distribution of Comments"))

# First plot
fig.add_trace(
    go.Bar(y = likes['diggCount'], 
           x = likes['likes'], 
           name="Likes",
           text = likes['likes'], 
           orientation='h',
           texttemplate='%{text:.2s}', 
           textposition='outside', 
           marker_color='rgb(162, 210, 255)'
    ),
    row=1,col=1
)

# second plot
fig.add_trace(
    go.Bar(y = comments['commentCount'], 
           x = comments['comments'], 
           name="Comments",
           text = comments['comments'], 
           orientation='h',
           texttemplate='%{text:.2s}', 
           textposition='outside', 
           marker_color='black'
    ),
    row=2,col=1
)

fig.update_layout(uniformtext_minsize=8,
                  uniformtext_mode='hide',
                  title_text="Multiple Subplots with Titles",
                  height=1200,
                  template='plotly_white',
                  margin=go.layout.Margin(
                      l=130, r=5, b=5, t=100, pad=10
                  ))

fig.update_xaxes(title_text='Videos')
fig.update_yaxes(title_text='Likes', col=1, row=1, automargin=False)
fig.update_yaxes(title_text='Comments', col=1, row=2, automargin=False)
fig.show(config={'displayModeBar': False})

In [13]:
# Focus on dataset from 0 till 50.000 likes
df_videos_users_focus = df_unique_videos[df_unique_videos['diggCount'] <= 50000] # count of dataset likes

# Create a scatter plot with a trendline
fig = px.scatter(df_videos_users_focus, trendline='ols',
                 x='diggCount',
                 y='commentCount',
                 labels={
                     'diggCount':'Likes',
                     'commentCount':'Comments'
                 },
                 log_y=True,
                 trendline_color_override="#ff7096",
                 template='plotly_white')

fig.update_traces(marker=dict(
    color='#4cc9f0',
    opacity=0.6
))
fig.show()

Popular Hashtags

In [14]:
# Create a DataFrame of the data
df_hashtags = pd.DataFrame(trending_videos_list) # containing data from trending video list into dataframe

# Let's expand the hashtag cell containing dictionaries to columns
df_hashtags = df_hashtags.explode('hashtags')

# Expand certain cells containing dictionaries to columns
df_hashtags = df_hashtags.apply(object_to_columns, hashtags='hashtag', axis=1)
hashtags = df_hashtags[['hashtag.name']].copy().dropna()
hashtags.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5330 entries, 2 to 999
Data columns (total 1 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   hashtag.name  5330 non-null   object
dtypes: object(1)
memory usage: 83.3+ KB


In [15]:
# Add column with default value
hashtags['count'] = 1

# Count all hashtags, group and replace the count column value with the sum
hashtags = hashtags.groupby(['hashtag.name'])['count'].count().reset_index()

# Sort by most popular hashtags and keep the top 15
hashtags = hashtags.sort_values(by='count', ascending=False)[:15]

# Create a Pie Chart with all values
fig = go.Figure(data=[go.Pie(
    labels=hashtags['hashtag.name'],
    values=hashtags['count'],
    textinfo='label+percent',
    insidetextorientation='radial'
)],
# Set colours for layout
layout={'colorway': ["#f72585","#b5179e",
                     "#7209b7","#560bad",
                     "#480ca8","#3a0ca3",
                     "#3f37c9","#4361ee",
                     "#4895ef","#4cc9f0"]})

fig.update_layout(title_text="Tiktok trending analytics",
                  height=700,
                  width=800,
                  margin=go.layout.Margin(
                      l=130, r=5, b=5, t=100, pad=10))
fig.show(config={'displayModeBar': False})

Enrich Data

In [16]:
df_audd_music = pd.read_csv('audd_music.csv', index_col='id')
df_audd_music_apple = pd.read_csv('audd_music_apple_music.csv')
df_audd_music_spotify = pd.read_csv('audd_music_spotify_music.csv')
df_audd_music_spotify_artists = pd.read_csv('audd_music_spotify_music_artists.csv')

In [17]:
# The current version of the dataset contains duplicated rows, remove them
df_audd_music = df_audd_music.drop_duplicates()

# Add prefic to this datasetm before merging
df_audd_music = df_audd_music.add_prefix('_audd_music.')
df_audd_music.shape

(471, 9)

In [18]:
# create a dataframe of the data sort from trending videos list
df_tiktok_music = pd.DataFrame(trending_videos_list)

# expand certain cells containing dictionaries to columns
df_tiktok_music = df_tiktok_music.apply(object_to_columns, musicMeta='musicMeta', axis=1)   

# Convert the columns dtype to int64 so we can merge
df_tiktok_music['musicMeta.musicId'] = df_tiktok_music['musicMeta.musicId'].astype('int64')
df_tiktok_music.shape

(1000, 25)

In [19]:
df_tiktok_audd_music = df_tiktok_music.merge(df_audd_music,
                                             how='left',
                                             right_on='id',
                                             left_on='musicMeta.musicId')
df_tiktok_audd_music.shape

(1000, 34)

Original sounds examples

In [20]:
df_tiktok_audd_music = df_tiktok_audd_music[(df_tiktok_audd_music['musicMeta.musicName'] == 'origineel geluid') &
                                            df_tiktok_audd_music['_audd_music.artist'].notna()]
df_tiktok_audd_music

Unnamed: 0,id,text,createTime,authorMeta,musicMeta,covers,webVideoUrl,videoUrl,videoUrlNoWaterMark,videoMeta,...,musicMeta.coverLarge,_audd_music.artist,_audd_music.title,_audd_music.album,_audd_music.release_date,_audd_music.label,_audd_music.timecode,_audd_music.song_link,_audd_music.apple_music.isrc,_audd_music.spotify.id
6,6895303013867539713,Oh no,1605437840,"{'id': '6879814870579512326', 'secUid': 'MS4wL...","{'musicId': '6893870343761496834', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@milanvannleeuwen/video...,https://v77.tiktokcdn.com/51d223926618e0839ece...,,"{'height': 1024, 'width': 576, 'duration': 11}",...,https://p16-sign-sg.tiktokcdn.com/aweme/1080x1...,LPTHERAPPER,Lmss,Lmss,2018-10-15,AK Noise,00:17,https://lis.tn/Lmss,,
150,6889831681469975810,Fishikta ameley❤️#eritreanmusic#habeshatiktok#...,1604163946,"{'id': '6775997579706008582', 'secUid': 'MS4wL...","{'musicId': '6881382384315419394', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@nardosabrahale/video/6...,https://v77.tiktokcdn.com/a2cabbf8ac7d8910a355...,,"{'height': 1024, 'width': 576, 'duration': 15}",...,https://p16-sign-sg.tiktokcdn.com/aweme/1080x1...,Abraham Afewerki,Semay,Semay,2006-06-01,Negarit Production,01:36,https://lis.tn/Semay,,
202,6883565215552654593,we surprised my mom with her dream car for her...,1602704922,"{'id': '6781188628154942469', 'secUid': 'MS4wL...","{'musicId': '6883565213174598401', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@curlsnclouds/video/688...,https://v77.tiktokcdn.com/ce8dacbe8736107f2db1...,,"{'height': 1024, 'width': 576, 'duration': 54}",...,https://p16-sign-sg.tiktokcdn.com/musically-ma...,Ritt Momney,Put Your Records On,Put Your Records On,2020-04-24,QuarterZip,00:20,https://lis.tn/PutYourRecordsOn,,
242,6876145412105899265,De trend na doen toch 🙃🙃 🏐🏐🏐#volleybal #traini...,1600977365,"{'id': '6725806948447060997', 'secUid': 'MS4wL...","{'musicId': '6875389627230915330', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@detwensechicks/video/6...,https://v19.tiktokcdn.com/0a2ac02f6301fa9975c3...,,"{'height': 1024, 'width': 576, 'duration': 11}",...,https://p16-sign-sg.tiktokcdn.com/musically-ma...,Lehambar Hussainpuri,Sadi Gali,Tanu Weds Manu,2011-02-02,T-Series,00:51,https://lis.tn/SadiGali,,1tEto4JrqNmBZFH5uAiYqb
297,6884691950478298370,🦋🦋🦋🦋🦋vlinders,1602967260,"{'id': '56906553353601024', 'secUid': 'MS4wLjA...","{'musicId': '6864179215273659141', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@maximeee.r/video/68846...,https://v77.tiktokcdn.com/38aae384548de6ad3c93...,,"{'height': 1024, 'width': 576, 'duration': 7}",...,https://p16-sign-sg.tiktokcdn.com/musically-ma...,Kris Kross Amsterdam,Mij Niet Eens Gezien,Mij Niet Eens Gezien,2020-07-24,WMG - Spinnin' Records (Distribution),02:33,https://lis.tn/MijNietEensGezien,NLZ542001176,39X7P5VjmG0zk8efBJL2HD
298,6888218363923942657,Legend dat ie dat durft,1603788318,"{'id': '6592928888640536582', 'secUid': 'MS4wL...","{'musicId': '6888218379384064769', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@waaromjaron/video/6888...,https://v77.tiktokcdn.com/664f938bc427587e8d5f...,,"{'height': 1024, 'width': 576, 'duration': 24}",...,https://p16-sign-sg.tiktokcdn.com/aweme/1080x1...,Fernando Velázquez,The Monster Wakes Up,A Monster Calls (Original Motion Picture Sound...,2016-12-09,Back Lot Music,02:53,https://lis.tn/TheMonsterWakesUp,USQ4E1602375,1XkvBIOIf4up5aTusU0vf2
334,6898350532415540482,#soundwavestattoo #spotifytattoo #tattoo #amst...,1606147396,"{'id': '6789291619143123974', 'secUid': 'MS4wL...","{'musicId': '6898350581761477377', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@mstar.ink/video/689835...,https://v33.tiktokcdn.com/3f7710e225edc9013019...,,"{'height': 1020, 'width': 576, 'duration': 14}",...,https://p16-sign-sg.tiktokcdn.com/musically-ma...,Stef Bos,Papa,Is Dit Nu Later,1990-03-16,Hkm Records nv,00:03,https://lis.tn/mKsZTP,BEJ019000009,6NP6BCW2M2I4vdcnXMAvjl
347,6876860979787959554,#fy #voorjou #foryou #fördich #covid #quarantine,1601143970,"{'id': '6811941079287743494', 'secUid': 'MS4wL...","{'musicId': '6876860989241838338', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@vooraluwlekkerefilmjos...,https://v77.tiktokcdn.com/9977049fdcd9b8c45d61...,,"{'height': 1024, 'width': 576, 'duration': 13}",...,https://p16-sign-sg.tiktokcdn.com/musically-ma...,The Love Unlimited Orchestra,Love's Theme,100 Essential Hits - 70s,2011-01-21,UMG - Brunswick,00:55,https://lis.tn/xsWYnp,USUMG0000356,4VpLTZ81muzN8ixvIvhmAt
348,6895338661622123777,Follow the chain #bottle #viral #bottleflipcha...,1605446140,"{'id': '6843492627725059078', 'secUid': 'MS4wL...","{'musicId': '6895338668958042882', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@whassup_bro/video/6895...,https://v77.tiktokcdn.com/52650d3235aa85e994d6...,,"{'height': 1024, 'width': 576, 'duration': 17}",...,https://p16-sign-sg.tiktokcdn.com/aweme/1080x1...,Norman Price,Gdzie Jest Biały Węgorz (Zejście),Narkotyki Są Nielegalne,2015-12-03,Gamellon,00:14,https://lis.tn/cuVkQ,,
487,6903854177072499969,Ruut doet mee aan de hype 😂 #foryou #fyp #voor...,1607428817,"{'id': '6652957483734974469', 'secUid': 'MS4wL...","{'musicId': '6903854187734452993', 'musicName'...",{'default': 'https://p16-sign-sg.tiktokcdn.com...,https://www.tiktok.com/@npo3nl/video/690385417...,https://v77.tiktokcdn.com/b532cfa208f3160ec203...,,"{'height': 1024, 'width': 576, 'duration': 14}",...,https://p16-sign-sg.tiktokcdn.com/aweme/1080x1...,Rolf Sanchez,Más Más Más,Más Más Más,2020-07-17,8ball Music,00:55,https://lis.tn/M%C3%A1sM%C3%A1sM%C3%A1s,NLZ292000124,28hEtjNvlDhtb38fgXsLRa


In [21]:
import numpy as np

tiktok_set_new = np.array(df_tiktok_audd_music['hashtags'])
pd.json_normalize(tiktok_set_new)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,,,,,,,,,,,,,,,,,
1,"{'id': '66390205', 'name': 'eritreanmusic', 't...","{'id': '1638738865659910', 'name': 'habeshatik...","{'id': '35322719', 'name': 'eritreangirl', 'ti...","{'id': '229207', 'name': 'fyp', 'title': '', '...","{'id': '42164', 'name': 'foryou', 'title': '',...","{'id': '6688', 'name': 'smile', 'title': 'Smil...","{'id': '1681081902185474', 'name': 'habeshahou...","{'id': '1648997583042566', 'name': 'habeshazoo...","{'id': '559221', 'name': 'gy', 'title': '', 'c...","{'id': '1669478018299909', 'name': 'eritreatim...",,,,,,,
2,"{'id': '15379', 'name': 'surprise', 'title': '...","{'id': '10007', 'name': 'birthday', 'title': '...","{'id': '6057', 'name': 'mom', 'title': '', 'co...","{'id': '229207', 'name': 'fyp', 'title': '', '...",,,,,,,,,,,,,
3,"{'id': '335226', 'name': 'volleybal', 'title':...","{'id': '12621', 'name': 'training', 'title': '...","{'id': '1741293', 'name': 'laat', 'title': '',...","{'id': '15836854', 'name': 'temelig', 'title':...",,,,,,,,,,,,,
4,,,,,,,,,,,,,,,,,
5,,,,,,,,,,,,,,,,,
6,"{'id': '1651372024079366', 'name': 'soundwaves...","{'id': '1677956684867586', 'name': 'spotifytat...","{'id': '17922', 'name': 'tattoo', 'title': '',...","{'id': '49294', 'name': 'amsterdam', 'title': ...","{'id': '23428', 'name': 'tiktok', 'title': '',...","{'id': '1655421416346630', 'name': 'amsterdamt...","{'id': '657467', 'name': 'spotify', 'title': '...","{'id': '84794', 'name': 'shocking', 'title': '...","{'id': '663471', 'name': 'tattooartist', 'titl...","{'id': '88764338', 'name': 'foryoupage', 'titl...","{'id': '58867304', 'name': 'coupletattoo', 'ti...",,,,,,
7,"{'id': '153828', 'name': 'fy', 'title': '', 'c...","{'id': '6830763', 'name': 'voorjou', 'title': ...","{'id': '42164', 'name': 'foryou', 'title': '',...","{'id': '1610614106412037', 'name': 'fördich', ...","{'id': '53168511', 'name': 'covid', 'title': '...","{'id': '636531', 'name': 'quarantine', 'title'...",,,,,,,,,,,
8,"{'id': '82512', 'name': 'bottle', 'title': '',...","{'id': '20884', 'name': 'viral', 'title': '', ...","{'id': '15065490', 'name': 'bottleflipchalleng...","{'id': '15176254', 'name': 'bottleflips', 'tit...","{'id': '229207', 'name': 'fyp', 'title': '', '...","{'id': '42164', 'name': 'foryou', 'title': '',...","{'id': '153828', 'name': 'fy', 'title': '', 'c...","{'id': '5432', 'name': 'cool', 'title': '', 'c...",,,,,,,,,
9,"{'id': '42164', 'name': 'foryou', 'title': '',...","{'id': '229207', 'name': 'fyp', 'title': '', '...","{'id': '6830763', 'name': 'voorjou', 'title': ...","{'id': '1603519477711878', 'name': 'voorjoupag...","{'id': '88764338', 'name': 'foryoupage', 'titl...",,,,,,,,,,,,


In [22]:
videoUrl = df_tiktok_audd_music.iloc[2]['musicMeta.playUrl']
url = df_tiktok_audd_music.iloc[2]['webVideoUrl']
print('Url to full video:', url)
print('Sound recognized by Audd:', df_tiktok_audd_music.iloc[2]['_audd_music.artist'],
      '-', df_tiktok_audd_music.iloc[2]['_audd_music.title'])
print('original sounds:')
Audio(df_tiktok_audd_music.iloc[2]['musicMeta.playUrl'])

Url to full video: https://www.tiktok.com/@curlsnclouds/video/6883565215552654593
Sound recognized by Audd: Ritt Momney - Put Your Records On
original sounds:


Contoh mengambil data baris ke -6 

In [23]:
# Mengambil data baris ke-6 dari df_tiktok_audd_music
row6 = df_tiktok_audd_music.iloc[5]
videoUrl = row6['musicMeta.playUrl']
url = row6['webVideoUrl']
print('Url to full video:', url)
print('Sound recognized by Audd:', row6['_audd_music.artist'], '-', row6['_audd_music.title'])
print('original sounds:')
Audio(row6['musicMeta.playUrl'])

Url to full video: https://www.tiktok.com/@waaromjaron/video/6888218363923942657
Sound recognized by Audd: Fernando Velázquez - The Monster Wakes Up
original sounds:


In [24]:
bucks = list(range(0, 250000, 10000))

likes_most = df_unique_videos.groupby(my_cut(df_unique_videos['diggCount'], bucks, upper_infinite=True)).diggCount.count()
comments_most = df_unique_videos.groupby(my_cut(df_unique_videos['commentCount'], bucks, upper_infinite=True)).diggCount.count()

# Transform to small modifications data row n columns
likes_most = likes_most.rename('likes_most').to_frame().reset_index()
comments_most = comments_most.rename('comments_most').to_frame().reset_index()

# make a plot

fig = make_subplots(2, 1, subplot_titles=("Distribution of most likes", "Distribution of most comments"))

# First plot
fig.add_trace(
    go.Bar(y = likes_most['diggCount'], 
           x = likes_most['likes_most'], 
           name="Likes",
           text = likes_most['likes_most'], 
           orientation='h',
           texttemplate='%{text:.2s}', 
           textposition='outside', 
           marker_color='#f82342'
    ),
    row=1,col=1
)

# second plot
fig.add_trace(
    go.Bar(y = comments_most['commentCount'], 
           x = comments_most['comments_most'], 
           name="Comments",
           text = comments_most['comments_most'], 
           orientation='h',
           texttemplate='%{text:.2s}', 
           textposition='outside', 
           marker_color='green'
    ),
    row=2,col=1
)

fig.update_layout(uniformtext_minsize=8,
                  uniformtext_mode='hide',
                  title_text="Multiple Subplots with Titles",
                  height=1200,
                  template='plotly_white',
                  margin=go.layout.Margin(
                      l=130, r=5, b=5, t=100, pad=10
                  ))

fig.update_xaxes(title_text='Videos')
fig.update_yaxes(title_text='Likes', col=1, row=1, automargin=False)
fig.update_yaxes(title_text='Comments', col=1, row=2, automargin=False)
fig.show(config={'displayModeBar': False})

In [25]:
plot_videos = df_unique_videos[df_unique_videos['diggCount'] <= 200000]

fig = px.scatter(plot_videos, trendline='ols',
                 x='diggCount',
                 y='commentCount',
                 labels={
                     'diggCount':'Likes',
                     'commentCount':'Comments'
                 },
                 log_y=True,
                 trendline_color_override="red",
                 template='plotly_white')

fig.update_traces(marker=dict(
    color='#008000',
    opacity=0.6
))
fig.show()

In [26]:
min_likes = 20000
max_likes = 30000

# we assume the algorithm videos must be in range <= 30000 likes instead.
# make a boolean filter for discovering video in the range
filter_likes = (df_unique_videos['diggCount'] >= min_likes) & (df_unique_videos['diggCount'] <= max_likes)

# apply filter into DataFrame and get tiktok user randomly
df_filter = df_unique_videos[filter_likes].sample(n=5)

print(f"Video through most likes is found between {min_likes} and {max_likes}")
df_filter

Video through most likes is found between 20000 and 30000


Unnamed: 0,id,text,createTime,webVideoUrl,videoUrl,videoUrlNoWaterMark,diggCount,shareCount,playCount,commentCount,...,covers.default,covers.origin,covers.dynamic,videoMeta.height,videoMeta.width,videoMeta.duration,hashtags.id,hashtags.name,hashtags.title,hashtags.cover
3060,6900478499933408513,Factory days #redbull #redbullracing #f1 #form...,1606642852,https://www.tiktok.com/@f1_flat_out/video/6900...,https://v77.tiktokcdn.com/f6de29f3420e91622faf...,,21700,1103,292500,78,...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,1024,576,41,12573,redbull,,
4512,6877869447999245569,Let’s get those boulder shoulders fellas 😈 #bo...,1601378772,https://www.tiktok.com/@noeldeyzel_bodybuilder...,https://v77.tiktokcdn.com/2a11914e741a065d1085...,,23200,514,332900,124,...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,960,540,15,19093,bodybuilding,,
5535,6904238654751870209,Рукожопы#встилеRIO #рекомендации #рек #стройка...,1607518332,https://www.tiktok.com/@laykron/video/69042386...,https://v77.tiktokcdn.com/8f0aaffcc42e40850132...,,28000,4533,2500000,668,...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,960,540,9,49711363,рекомендации,,
2144,6907464609116982529,#kesfet #turkish #asianmakeup #foryou #viral #...,1608269434,https://www.tiktok.com/@achi.roffazuid/video/6...,https://v77.tiktokcdn.com/2f2e9c826f569f58fa12...,,25200,194,439700,984,...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,1024,576,15,71559893,kesfet,,
1531,6890594872328195333,Reply to @lucasneal1 comment any other ideas i...,1604341686,https://www.tiktok.com/@bundaddy/video/6890594...,https://v77.tiktokcdn.com/1bea1665140377dba7a6...,,20000,210,111100,466,...,https://p16-sign-sg.tiktokcdn.com/obj/tos-mali...,https://p16-sign-sg.tiktokcdn.com/obj/tos-mali...,https://p16-sign-sg.tiktokcdn.com/obj/tos-mali...,1024,576,58,229207,fyp,,


In [48]:
df_unique_videos

Unnamed: 0,id,text,createTime,webVideoUrl,videoUrl,videoUrlNoWaterMark,diggCount,shareCount,playCount,commentCount,...,covers.default,covers.origin,covers.dynamic,videoMeta.height,videoMeta.width,videoMeta.duration,hashtags.id,hashtags.name,hashtags.title,hashtags.cover
0,6907228749016714497,Confidence went 📈,1608214517,https://www.tiktok.com/@ninakleij/video/690722...,https://v77.tiktokcdn.com/ed1f811617d7b5e18b8d...,,3710,50,44800,68,...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,1024,576,15,,,,
1,6875468410612993286,Quiet Zone... follow me on insta: joeysofo. Co...,1600819763,https://www.tiktok.com/@joeysofo/video/6875468...,https://v77.tiktokcdn.com/ab935f1975cb8b69aebf...,,55700,1817,838100,936,...,https://p16-sign-sg.tiktokcdn.com/obj/tos-mali...,https://p16-sign-sg.tiktokcdn.com/obj/tos-mali...,https://p16-sign-sg.tiktokcdn.com/obj/tos-mali...,1024,576,11,,,,
2,6898699405898059010,Iphone bend test🤗 #tiktok #viral #fyp #iphone ...,1606228625,https://www.tiktok.com/@jackeyephone/video/689...,https://v21.tiktokcdn.com/video/tos/alisg/tos-...,,936200,21100,15300000,27100,...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,960,540,19,23428,tiktok,,
8,6902819837345533186,,1607187987,https://www.tiktok.com/@naomivaneeren/video/69...,https://v21.tiktokcdn.com/video/tos/alisg/tos-...,,12900,197,94900,143,...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,1024,576,13,,,,
9,6905635666588192002,小技です👟✨#tiktok教室#tutorial,1607843600,https://www.tiktok.com/@io.dreamer_mk/video/69...,https://v77.tiktokcdn.com/7622979a5ceb97c6da4e...,,8805,198,115300,52,...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,1024,576,22,1627704798586882,tiktok教室,今回の #TikTok教室 -今年学んだ知識編-では、色々あった2020年あなたが学んだ知識...,https://p77-sg.tiktokcdn.com/obj/tiktok-obj/84...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5671,6877191692341054721,#foryou #foryoupage,1601220970,https://www.tiktok.com/@artistmiranda/video/68...,https://v77.tiktokcdn.com/12644d87ac40de1c1ab2...,,13300,152,129300,111,...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,1024,576,13,42164,foryou,,
5673,6908069845825359109,Stop eating 💩 #gttfg #gotothegym #swolefam #nu...,1608410366,https://www.tiktok.com/@papaswolio/video/69080...,https://v77.tiktokcdn.com/740d0b0889f43900715c...,,12200,223,80700,321,...,https://p16-sign-sg.tiktokcdn.com/obj/tos-mali...,https://p16-sign-sg.tiktokcdn.com/obj/tos-mali...,https://p16-sign-sg.tiktokcdn.com/obj/tos-mali...,1024,576,20,1685997126071301,gttfg,,
5681,6883484287434378497,#fy #foryoupage #foryou,1602686079,https://www.tiktok.com/@sanaelfarah/video/6883...,https://v77.tiktokcdn.com/eed7b93664d2cfa5b122...,,26600,3392,449300,668,...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,1024,576,13,153828,fy,,
5684,6898721943978036481,regretss 📈 #fyp #foryou #curls,1606233872,https://www.tiktok.com/@safae.kx/video/6898721...,https://v77.tiktokcdn.com/a402298fb8ed593ed456...,,10000,111,72200,274,...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,https://p16-sign-sg.tiktokcdn.com/obj/tos-alis...,1024,576,12,229207,fyp,,


In [27]:
import pandas as pd
import os
from IPython.display import Video, display

# --- 1. Definisi Fungsi show_video ---
def show_video(file_path, width=None):
    """
    Menampilkan video di Jupyter Notebook.

    Args:
        file_path (str): Jalur (path) ke file video yang ingin ditampilkan.
                         Bisa berupa jalur relatif atau absolut.
    """
    if not os.path.exists(file_path):
        print(f"Error: File video tidak ditemukan di '{file_path}'")
        print("Pastikan nama file dan jalurnya sudah benar.")
        return

    print(f"Memutar video dari: {file_path}")
    try:
        video_player = Video(file_path, embed=True, width=width)
        display(video_player)
    except Exception as e:
        print(f"Gagal memutar video. Error: {e}")
        print("Pastikan format video kompatibel (misalnya .mp4 atau .webm).")


# --- 2. Persiapan Data (Jalankan ini di Cell yang sama) ---
# Anda bisa mengganti data dummy ini dengan data Anda sendiri.
data = {'desc': ['Video most like 1', 'Video most like 2', 'Video most like 3', 'Video most like 4', 'Video most like 5'],
        'diggCount_new': [21400, 21300, 27400, 24200, 21800],
        'video_path': [r'C:\Users\User\AppData\Roaming\Python\Python313\site-packages\ML-Production\Tiktok-ML\Videos\6899843622355750145.mp4', 
                       r'C:\Users\User\AppData\Roaming\Python\Python313\site-packages\ML-Production\Tiktok-ML\Videos\6904359798809054470.mp4', 
                       r'C:\Users\User\AppData\Roaming\Python\Python313\site-packages\ML-Production\Tiktok-ML\Videos\6895048600355704070.mp4',
                       r'C:\Users\User\AppData\Roaming\Python\Python313\site-packages\ML-Production\Tiktok-ML\Videos\6888373722546294022.mp4',
                       r'C:\Users\User\AppData\Roaming\Python\Python313\site-packages\ML-Production\Tiktok-ML\Videos\6901026135119760642.mp4'],
        'tiktok_music': ['https://sf77-sg.tiktokcdn.com/obj/tiktok-obj/6879039742646815489.mp3',
        'https://video-sg.tiktokv.com/storage/v1/tos-alisg-v-27dcd7/ba0786fecf35498cba4e259725c4e440?a=1233&br=250&bt=125&cd=0%7C0%7C0&cr=0&cs=&dr=0&ds=&er=0&l=202012211557500101890660493A124105&lr=default&mime_type=audio_mpeg&qs=6&rc=Mzs8NnltPDdkeDMzODU8M0ApOGRoOWU1ODxlN2k8aTtnOGc1LXAxMm1rYGxfLS1jMTRzczZjYDZeMl8zYDA2L2FfNjM6Yw%3D%3D&vl=&vr=&x-tos-algorithm=v2&x-tos-authkey=5bf25627da095a5cba28ace592de46cc&x-tos-expires=1608587884&x-tos-signature=Vd-e9LNcb4goa8hg-FnQ-R-KstM',
        'https://sf16-sg.tiktokcdn.com/obj/musically-maliva-obj/6878032546374929158.mp3',
        'https://sf16-sg.tiktokcdn.com/obj/tiktok-obj/17fd2400f9ae0386ad9a5c4c86bc7405.m4a',
        'https://sf77-sg.tiktokcdn.com/obj/musically-maliva-obj/6863594361750440710.mp3']}
df_path_videos = pd.DataFrame(data)

# Setelah menjalankan cell ini, fungsi show_video dan dataframe df_unique_videos sudah siap digunakan.
print("Persiapan selesai. Fungsi 'show_video' dan DataFrame 'df_unique_videos' sudah dimuat.")

Persiapan selesai. Fungsi 'show_video' dan DataFrame 'df_unique_videos' sudah dimuat.


In [28]:
df_path_videos

Unnamed: 0,desc,diggCount_new,video_path,tiktok_music
0,Video most like 1,21400,C:\Users\User\AppData\Roaming\Python\Python313...,https://sf77-sg.tiktokcdn.com/obj/tiktok-obj/6...
1,Video most like 2,21300,C:\Users\User\AppData\Roaming\Python\Python313...,https://video-sg.tiktokv.com/storage/v1/tos-al...
2,Video most like 3,27400,C:\Users\User\AppData\Roaming\Python\Python313...,https://sf16-sg.tiktokcdn.com/obj/musically-ma...
3,Video most like 4,24200,C:\Users\User\AppData\Roaming\Python\Python313...,https://sf16-sg.tiktokcdn.com/obj/tiktok-obj/1...
4,Video most like 5,21800,C:\Users\User\AppData\Roaming\Python\Python313...,https://sf77-sg.tiktokcdn.com/obj/musically-ma...


In [29]:
# --- Panggilan Fungsi show_video di Cell Baru ---
# Ini adalah bagian yang akan Anda jalankan secara terpisah.
try:
    # Cari video dengan 'like' terbanyak dari dataframe yang sudah dimuat
    random_video = df_path_videos.sample(n=1) # choose random videos to play
    video_popular_path = random_video['video_path'].iloc[0] # path to the videos directory

    # append tiktok music into videos
    music_append = random_video['tiktok_music'].iloc[0]

    # append the description from path is required
    description = random_video['desc'].iloc[0]

    # amount of likes
    likes_amount = random_video['diggCount_new'].iloc[0]

    # Ambil jalur file dari baris yang ditemukan
    print("Videos information random sample which popular was being filtered:")
    print(f"  Deskripsi: {description}")
    print(f"  Jumlah Likes: {likes_amount}")
    print(f"  Jalur File: {video_popular_path}")
    print(f"  Add tiktok music: {music_append}")

    # Panggil fungsi show_video dengan jalur yang sudah didapatkan
    show_video(video_popular_path, width=400)

except NameError:
    print("Error: DataFrame 'df_unique_videos' belum didefinisikan.")
    print("Pastikan Anda sudah menjalankan cell pertama terlebih dahulu.")
except KeyError:
    print("Error: Kolom 'diggCount' atau 'video_path' tidak ditemukan di dataframe.")
    print("Pastikan nama kolom sudah benar.")
except Exception as e:
    print(f"Terjadi kesalahan: {e}")

Videos information random sample which popular was being filtered:
  Deskripsi: Video most like 4
  Jumlah Likes: 24200
  Jalur File: C:\Users\User\AppData\Roaming\Python\Python313\site-packages\ML-Production\Tiktok-ML\Videos\6888373722546294022.mp4
  Add tiktok music: https://sf16-sg.tiktokcdn.com/obj/tiktok-obj/17fd2400f9ae0386ad9a5c4c86bc7405.m4a
Memutar video dari: C:\Users\User\AppData\Roaming\Python\Python313\site-packages\ML-Production\Tiktok-ML\Videos\6888373722546294022.mp4


Tiktok Analytics Playcount For Data Science

In [30]:
# set bucket ranges
buckets = list(range(0, 25000000, 500000)) # range like around 

plays = df_unique_videos.groupby(my_cut(df_unique_videos['playCount'], buckets, upper_infinite=True)).playCount.count()
likes = df_unique_videos.groupby(my_cut(df_unique_videos['diggCount'], buckets, upper_infinite=True)).diggCount.count()

# Transform from series to dataframe with some small modifications
plays = plays.rename('plays').to_frame().reset_index()
likes = likes.rename('likes').to_frame().reset_index()

# create subplots, two rows and 1 column each row
fig = make_subplots(2, 1, subplot_titles=("Distribution of Plays", "Distribution of Likes"))

# First plot
fig.add_trace(
    go.Bar(y = plays['playCount'], 
           x = plays['plays'], 
           name="Plays",
           text = plays['plays'], 
           orientation='h',
           texttemplate='%{text:.2s}', 
           textposition='outside', 
           marker_color='rgb(162, 210, 255)'
    ),
    row=1,col=1
)

# second plot
fig.add_trace(
    go.Bar(y = likes['diggCount'], 
           x = likes['likes'], 
           name="Likes",
           text = likes['likes'], 
           orientation='h',
           texttemplate='%{text:.2s}', 
           textposition='outside', 
           marker_color='black'
    ),
    row=2,col=1
)

fig.update_layout(uniformtext_minsize=8,
                  uniformtext_mode='hide',
                  title_text="Multiple Subplots with Titles",
                  height=1200,
                  template='plotly_white',
                  margin=go.layout.Margin(
                      l=130, r=5, b=5, t=100, pad=10
                  ))

fig.update_xaxes(title_text='Videos')
fig.update_yaxes(title_text='Plays', col=1, row=1, automargin=False)
fig.update_yaxes(title_text='Likes', col=1, row=2, automargin=False)
fig.show(config={'displayModeBar': False})

In [31]:
# Focus on dataset from 0 till 50.000 likes
df_videos_users_focus = df_unique_videos[df_unique_videos['playCount'] <= 2500000] # count of dataset likes

# Create a scatter plot with a trendline
fig = px.scatter(df_videos_users_focus, trendline='ols',
                 x='playCount',
                 y='diggCount',
                 labels={
                     'playCount':'Plays',
                     'diggCount':'Likes'
                 },
                 log_y=True,
                 trendline_color_override="#ff7096",
                 template='plotly_white')

fig.update_traces(marker=dict(
    color='#4cc9f0',
    opacity=0.6
))
fig.show()

In [32]:
# Create a DataFrame of the data
df_plays = pd.DataFrame(trending_videos_list) # containing data from trending video list into dataframe

# Let's expand the hashtag cell containing dictionaries to columns
df_plays = df_plays.explode('playCount')

# Expand certain cells containing dictionaries to columns
df_plays = df_plays.apply(object_to_columns, playCount='plays', axis=1)
plays = df_plays[['playCount']].copy().dropna()
plays.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1000 entries, 0 to 999
Data columns (total 1 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   playCount  1000 non-null   int64
dtypes: int64(1)
memory usage: 15.6 KB


In [34]:
# Add column with default value
plays['count'] = 1

# Count all hashtags, group and replace the count column value with the sum
plays = plays.groupby(['playCount'])['count'].count().reset_index()

# Sort by most popular hashtags and keep the top 15
plays = plays.sort_values(by='count', ascending=False)[:4] # analysis data based on amount of data we want to choose

# Create a Pie Chart with all values
fig = go.Figure(data=[go.Pie(
    labels=plays['playCount'],
    values=plays['count'],
    textinfo='label+percent',
    insidetextorientation='radial'
)],
# Set colours for layout
layout={'colorway': ["#f72585","#b5179e",
                     "#7209b7","#560bad",
                     "#480ca8","#3a0ca3",
                     "#3f37c9","#4361ee",
                     "#4895ef","#4cc9f0"]})

fig.update_layout(title_text="Tiktok plays trending analytics",
                  height=700,
                  width=800,
                  margin=go.layout.Margin(
                      l=130, r=5, b=5, t=100, pad=10))
fig.show(config={'displayModeBar': False})