In [1]:
import pandas as pd

# Load the combined dataset
df = pd.read_csv("youtube_trending_master.csv")

# Preview the first few rows
df.head()

Unnamed: 0,video_id,title,publishedAt,channelId,channelTitle,categoryId,trending_date,tags,view_count,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,description,region
0,Iot0eF6EoNA,Sadak 2 | Official Trailer | Sanjay | Pooja | ...,2020-08-12T04:31:41Z,UCGqvJPRcv7aVFun-eTsatcA,FoxStarHindi,24,2020-08-12T00:00:00Z,sadak|sadak 2|mahesh bhatt|vishesh films|pooja...,9885899,224925,3979409,350210,https://i.ytimg.com/vi/Iot0eF6EoNA/default.jpg,False,False,Three Streams. Three Stories. One Journey. Sta...,IN
1,x-KbnJ9fvJc,Kya Baat Aa : Karan Aujla (Official Video) Tan...,2020-08-11T09:00:11Z,UCm9SZAl03Rev9sFwloCdz1g,Rehaan Records,10,2020-08-12T00:00:00Z,[None],11308046,655450,33242,405146,https://i.ytimg.com/vi/x-KbnJ9fvJc/default.jpg,False,False,Singer/Lyrics: Karan Aujla Feat Tania Music/ D...,IN
2,KX06ksuS6Xo,Diljit Dosanjh: CLASH (Official) Music Video |...,2020-08-11T07:30:02Z,UCZRdNleCgW-BGUJf-bbjzQg,Diljit Dosanjh,10,2020-08-12T00:00:00Z,clash diljit dosanjh|diljit dosanjh|diljit dos...,9140911,296533,6179,30058,https://i.ytimg.com/vi/KX06ksuS6Xo/default.jpg,False,False,CLASH official music video performed by DILJIT...,IN
3,UsMRgnTcchY,Dil Ko Maine Di Kasam Video | Amaal M Ft.Ariji...,2020-08-10T05:30:49Z,UCq-Fj5jknLsUf-MWSy4_brA,T-Series,10,2020-08-12T00:00:00Z,hindi songs|2020 hindi songs|2020 new songs|t-...,23564512,743931,84162,136942,https://i.ytimg.com/vi/UsMRgnTcchY/default.jpg,False,False,Gulshan Kumar and T-Series presents Bhushan Ku...,IN
4,WNSEXJJhKTU,"Baarish (Official Video) Payal Dev,Stebin Ben ...",2020-08-11T05:30:13Z,UCye6Oz0mg46S362LwARGVcA,VYRLOriginals,10,2020-08-12T00:00:00Z,VYRL Original|Mohsin Khan|Shivangi Joshi|Payal...,6783649,268817,8798,22984,https://i.ytimg.com/vi/WNSEXJJhKTU/default.jpg,False,False,VYRL Originals brings to you ‘Baarish’ - the b...,IN


In [2]:
# Grouping by Category ID and Region to get count of trending videos
category_region_group = df.groupby(['categoryId', 'region']).size().reset_index(name='trending_video_count')

# View result
category_region_group.sort_values(by='trending_video_count', ascending=False).head()

Unnamed: 0,categoryId,region,trending_video_count
104,24,IN,98230
106,24,KR,90028
105,24,JP,88286
101,24,DE,69436
99,24,BR,64894


In [3]:
# Group by Category to calculate total and average of views, likes, and comments
category_stats = df.groupby('categoryId').agg(
    total_views=('view_count', 'sum'),
    avg_views=('view_count', 'mean'),
    total_likes=('likes', 'sum'),
    avg_likes=('likes', 'mean'),
    total_comments=('comment_count', 'sum'),
    avg_comments=('comment_count', 'mean')
).reset_index()

# View result
category_stats.sort_values(by='total_views', ascending=False).head()

Unnamed: 0,categoryId,total_views,avg_views,total_likes,avg_likes,total_comments,avg_comments
2,10,1719743998807,3929335.0,107075674563,244650.453227,11337714331,25904.82816
9,24,1610094690414,2159911.0,74613325507,100092.328082,3813748211,5116.069208
6,20,508496977887,1269041.0,28339354071,70725.676129,1797393913,4485.702089
7,22,446552237879,1317776.0,24663271856,72781.353967,1226420616,3619.169163
4,17,370842569310,1184086.0,9305596631,29712.399321,685131327,2187.597032


In [4]:
# Count how many times each channel appears (most trending)
top_channels = df['channelTitle'].value_counts().reset_index()
top_channels.columns = ['channelTitle', 'trending_video_count']

# View result
top_channels.head(10)

Unnamed: 0,channelTitle,trending_video_count
0,HYBE LABELS,7080
1,BANGTANTV,6611
2,JYP Entertainment,6611
3,SMTOWN,6258
4,FORMULA 1,5145
5,MrBeast,5086
6,NFL,4120
7,Clash of Clans,4077
8,BLACKPINK,3943
9,NBA,3656


In [5]:
# Count how many videos trended per day
daily_trending = df['trending_date'].value_counts().reset_index()
daily_trending.columns = ['trending_date', 'video_count']

# Convert to datetime if needed
daily_trending['trending_date'] = pd.to_datetime(daily_trending['trending_date'], errors='coerce')

# View result
daily_trending.sort_values(by='trending_date').head()

Unnamed: 0,trending_date,video_count
1212,2020-08-12 00:00:00+00:00,2105
1219,2020-08-13 00:00:00+00:00,2103
1202,2020-08-14 00:00:00+00:00,2106
1170,2020-08-15 00:00:00+00:00,2114
968,2020-08-16 00:00:00+00:00,2142


In [6]:
# Add a new column for view-to-like ratio, avoiding division by zero
df['view_like_ratio'] = df.apply(lambda row: row['view_count'] / row['likes'] if row['likes'] != 0 else None, axis=1)

# View some samples sorted by best ratio (lower is better engagement)
df[['title', 'view_count', 'likes', 'view_like_ratio']].sort_values(by='view_like_ratio').head()

Unnamed: 0,title,view_count,likes,view_like_ratio
1525482,Live at the Met Gala With Vogue,0,92348,0.0
1866192,Anitta | RELEASED (Full Episode),0,28786,0.0
519512,Sahara 🔴 Live from Coachella 2024,0,131156,0.0
548615,Hello 2021 UK | YouTube NYE Big Night in with ...,0,14050,0.0
450709,Live at the Met Gala With Vogue,0,90325,0.0
