In [1]:
# Dependencies
import json
import pandas as pd
import numpy as np
from datetime import datetime
youtube_df = pd.read_csv('./Data/Youtube_last_trending.csv')

In [2]:
# Convert 'comments_disabled' and 'ratings_disabled' to 'int' to create count
youtube_df.comments_disabled = youtube_df.comments_disabled.astype('int')
youtube_df.ratings_disabled = youtube_df.ratings_disabled.astype('int')


In [3]:
# Create a videos per category count and calculate video shate
videos_per_category = youtube_df.category_type.value_counts()
video_share = videos_per_category/videos_per_category.sum() * 100
# Count views per category and count view share
views_per_category = youtube_df.groupby('category_type').sum()['views']
view_share = views_per_category/views_per_category.sum() * 100
# Count likes per category and count likes share
likes_per_category = youtube_df.groupby('category_type').sum()['likes']
like_share = likes_per_category/likes_per_category.sum() * 100
# Count dislikes per category and count dislikes share
dislikes_per_category = youtube_df.groupby('category_type').sum()['dislikes']
dislike_share = dislikes_per_category/dislikes_per_category.sum() * 100
# Create a Likes/dislikes ratio
ld_ratio = likes_per_category/dislikes_per_category
# Count comments per category  and comment share
comments_per_category = youtube_df.groupby('category_type').sum()['comment_count']
comment_share = comments_per_category/comments_per_category.sum() * 100
# Create and adjusted comment_share
# Create an engagement to views ratio
engagement_ratio = (comments_per_category + likes_per_category + dislikes_per_category)/views_per_category
weighted_engagement_ratio = ((comments_per_category * 2) + likes_per_category + dislikes_per_category)/views_per_category

In [4]:
# Create a DataFrame
category_stats = pd.DataFrame(
          {"Uploads": videos_per_category, 
           "% Upload Share": video_share,
          "Views": views_per_category,
           "% View Share": view_share,
          "Likes": likes_per_category,
          "% Like Share": like_share,
          "Dislikes": dislikes_per_category,
          "% Dislike Share": dislike_share,
           "Likes/Dislikes Ratio": ld_ratio,
          "Comments": comments_per_category,
          "% Comments Share": comment_share,
          "Engagement Ratio": engagement_ratio,
          "Weighted Engagement Rate": weighted_engagement_ratio})

category_stats.index.name='Category'

# Add a comma for thousands seperator on applicable columns
category_stats["Uploads"] = category_stats["Uploads"].map("{:,}".format)
category_stats["Views"] = category_stats["Views"].map("{:,}".format)
category_stats["Likes"] = category_stats["Likes"].map("{:,}".format)
category_stats["Dislikes"] = category_stats["Dislikes"].map("{:,}".format)
category_stats["Comments"] = category_stats["Comments"].map("{:,}".format)
# Make percentage values show to the second decimal place
category_stats["% Upload Share"] = category_stats["% Upload Share"].map("{:.2f}".format)
category_stats["% View Share"] = category_stats["% View Share"].map("{:.2f}".format)
category_stats["% Like Share"] = category_stats["% Like Share"].map("{:.2f}".format)
category_stats["% Dislike Share"] = category_stats["% Dislike Share"].map("{:.2f}".format)
category_stats["% Comments Share"] = category_stats["% Comments Share"].map("{:.2f}".format)
# Make Likes/Dislikes show to the second decimal place
category_stats["Likes/Dislikes Ratio"] = category_stats["Likes/Dislikes Ratio"].map("{:.2f}".format)
category_stats

Unnamed: 0_level_0,Uploads,% Upload Share,Views,% View Share,Likes,% Like Share,Dislikes,% Dislike Share,Likes/Dislikes Ratio,Comments,% Comments Share,Engagement Ratio,Weighted Engagement Rate
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Autos & Vehicles,71,1.12,102913586,0.83,730306,0.21,46397,0.24,15.74,123347,0.3,0.008746,0.009944
Comedy,544,8.58,773843693,6.23,29960684,8.52,1067507,5.49,28.07,2986525,7.31,0.043956,0.047815
Education,250,3.94,152537923,1.23,5401086,1.54,189835,0.98,28.45,633836,1.55,0.040808,0.044963
Entertainment,1619,25.52,2826325423,22.77,67383259,19.17,6728191,34.6,10.02,9887360,24.18,0.02972,0.033218
Film & Animation,319,5.03,798670825,6.43,16456055,4.68,681795,3.51,24.14,1938578,4.74,0.023885,0.026312
Gaming,103,1.62,239483937,1.93,6991366,1.99,624992,3.21,11.19,1386717,3.39,0.037594,0.043384
Howto & Style,594,9.36,557056875,4.49,20646383,5.87,705227,3.63,29.28,2894411,7.08,0.043525,0.048721
Music,798,12.58,4800022515,38.66,149530945,42.53,5237802,26.93,28.55,12807782,31.33,0.034912,0.03758
News & Politics,504,7.95,231805196,1.87,3011176,0.86,769830,3.96,3.91,1078391,2.64,0.020963,0.025615
Nonprofits & Activism,14,0.22,31064609,0.25,2388624,0.68,563262,2.9,4.24,740432,1.81,0.118859,0.142695


In [5]:
# category_stats.to_csv('./Data/category_stats.csv')