In [22]:
# Dependencies
import json
import pandas as pd
import numpy as np
from datetime import datetime
youtube_df = pd.read_csv('./Data/USvideos_c1.csv')

In [23]:
# Convert 'comments_disabled' and 'ratings_disabled' to 'int' to create count
youtube_df.comments_disabled = youtube_df.comments_disabled.astype('int')
youtube_df.ratings_disabled = youtube_df.ratings_disabled.astype('int')


In [24]:
# Create a videos per category count and calculate video shate
videos_per_category = youtube_df.category_type.value_counts()
video_share = videos_per_category/videos_per_category.sum() * 100
# Count views per category and count view share
views_per_category = youtube_df.groupby('category_type').sum()['views']
view_share = views_per_category/views_per_category.sum() * 100
# Count likes per category and count likes share
likes_per_category = youtube_df.groupby('category_type').sum()['likes']
like_share = likes_per_category/likes_per_category.sum() * 100
# Count dislikes per category and count dislikes share
dislikes_per_category = youtube_df.groupby('category_type').sum()['dislikes']
dislike_share = dislikes_per_category/dislikes_per_category.sum() * 100
# Create a Likes/dislikes ratio
ld_ratio = likes_per_category/dislikes_per_category
# Count comments per category  and comment share
comments_per_category = youtube_df.groupby('category_type').sum()['comment_count']
comment_share = comments_per_category/comments_per_category.sum() * 100
# Create and adjusted comment_share
# Create an engagement to views ratio
engagement_ratio = (comments_per_category + likes_per_category + dislikes_per_category)/views_per_category
weighted_engagement_ratio = ((comments_per_category * 2) + likes_per_category + dislikes_per_category)/views_per_category

In [25]:
# Create a DataFrame
category_stats = pd.DataFrame(
          {"Uploads": videos_per_category, 
           "% Upload Share": video_share,
          "Views": views_per_category,
           "% View Share": view_share,
          "Likes": likes_per_category,
          "% Like Share": like_share,
          "Dislikes": dislikes_per_category,
          "% Dislike Share": dislike_share,
           "Likes/Dislikes Ratio": ld_ratio,
          "Comments": comments_per_category,
          "% Comments Share": comment_share,
          "Engagement Ratio": engagement_ratio,
          "Weighted Engagement Rate": weighted_engagement_ratio})

category_stats.index.name='Category'

# Add a comma for thousands seperator on applicable columns
category_stats["Uploads"] = category_stats["Uploads"].map("{:,}".format)
category_stats["Views"] = category_stats["Views"].map("{:,}".format)
category_stats["Likes"] = category_stats["Likes"].map("{:,}".format)
category_stats["Dislikes"] = category_stats["Dislikes"].map("{:,}".format)
category_stats["Comments"] = category_stats["Comments"].map("{:,}".format)
# Make percentage values show to the second decimal place
category_stats["% Upload Share"] = category_stats["% Upload Share"].map("{:.2f}".format)
category_stats["% View Share"] = category_stats["% View Share"].map("{:.2f}".format)
category_stats["% Like Share"] = category_stats["% Like Share"].map("{:.2f}".format)
category_stats["% Dislike Share"] = category_stats["% Dislike Share"].map("{:.2f}".format)
category_stats["% Comments Share"] = category_stats["% Comments Share"].map("{:.2f}".format)
# Make Likes/Dislikes show to the second decimal place
category_stats["Likes/Dislikes Ratio"] = category_stats["Likes/Dislikes Ratio"].map("{:.2f}".format)
category_stats

Unnamed: 0_level_0,Uploads,% Upload Share,Views,% View Share,Likes,% Like Share,Dislikes,% Dislike Share,Likes/Dislikes Ratio,Comments,% Comments Share,Engagement Ratio,Weighted Engagement Rate
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Autos & Vehicles,384,0.94,520690717,0.54,4245656,0.14,243010,0.16,17.47,784447,0.23,0.010127,0.011634
Comedy,3457,8.44,5117426208,5.29,216346746,7.11,7230391,4.76,29.92,22545582,6.52,0.048095,0.052501
Education,1656,4.04,1180629990,1.22,49257772,1.62,1351972,0.89,36.43,5442242,1.57,0.047476,0.052086
Entertainment,9964,24.33,20604388195,21.31,530516491,17.44,42987663,28.29,12.34,73566498,21.27,0.031405,0.034975
Film & Animation,2345,5.73,7284156721,7.53,165997476,5.46,6075148,4.0,27.32,17887060,5.17,0.026078,0.028534
Gaming,817,2.0,2141218625,2.21,69038284,2.27,9184466,6.04,7.52,14740713,4.26,0.043416,0.0503
Howto & Style,4146,10.12,4078545064,4.22,162880075,5.36,5473899,3.6,29.76,23149550,6.69,0.046954,0.05263
Music,6472,15.81,40132892190,41.51,1416838584,46.59,51179008,33.68,27.68,125296396,36.22,0.039701,0.042823
News & Politics,2487,6.07,1473765704,1.52,18151033,0.6,4180049,2.75,4.34,6039433,1.75,0.01925,0.023348
Nonprofits & Activism,57,0.14,168941392,0.17,14815646,0.49,3310381,2.18,4.48,4808797,1.39,0.135756,0.16422


In [26]:
# category_stats.to_csv('./Data/category_stats.csv')