In [2]:
# Dependencies
import json
import pandas as pd
import numpy as np
from datetime import datetime
youtube_df = pd.read_csv('./Data/Youtube_last_trending.csv')

In [3]:
# Figure out column names and dtypes
youtube_df.dtypes

Unnamed: 0                 int64
video_id                  object
trending_date             object
title                     object
channel_title             object
category_id                int64
publish_time              object
tags                      object
views                      int64
likes                      int64
dislikes                   int64
comment_count              int64
comments_disabled           bool
ratings_disabled            bool
video_error_or_removed      bool
description               object
post_to_trend_days         int64
days_trending              int64
tags_count                 int64
category_type             object
dtype: object

In [4]:
youtube_df.channel_title.nunique()

2198

In [5]:
# Convert 'comments_disabled' and 'ratings_disabled' to 'int' to create count
youtube_df.comments_disabled = youtube_df.comments_disabled.astype('int')
youtube_df.ratings_disabled = youtube_df.ratings_disabled.astype('int')
youtube_df.video_error_or_removed = youtube_df.video_error_or_removed.astype('int')

In [6]:
# Create an upload count
upload_count = (youtube_df.channel_title.value_counts()).astype('int')

# Count views per channel 
views_per_channel = youtube_df.groupby(['channel_title']).sum()['views']

# Count likes per channel 
likes_per_channel = youtube_df.groupby('channel_title').sum()['likes']

# Count dislikes per channel 
dislikes_per_channel = youtube_df.groupby('channel_title').sum()['dislikes']

# Count comments per channel 
comments_per_channel = youtube_df.groupby('channel_title').sum()['comment_count']

# Count videos with comments disabled
comments_disabled_per_channel = youtube_df.groupby('channel_title').sum()['comments_disabled']

# Count videos with ratings disabled
ratings_disabled_per_channel = youtube_df.groupby('channel_title').sum()['ratings_disabled']

days_trending = youtube_df.groupby('channel_title').sum()['days_trending']
trending_days_per_vid = (days_trending/upload_count).astype('int')

# Count videos with ratings disabled
tags_per_channel = youtube_df.groupby('channel_title').sum()['tags_count']

# Count videos with ratings disabled
days_to_trend = youtube_df.groupby('channel_title').sum()['post_to_trend_days']



In [8]:
upload_count

ESPN                                      84
TheEllenShow                              74
The Tonight Show Starring Jimmy Fallon    72
Jimmy Kimmel Live                         70
Netflix                                   58
                                          ..
Famous Celebrities Snaps                   1
Onur Altuğ Sever                           1
Dawid Szmandra                             1
IFC                                        1
How It Should Have Ended                   1
Name: channel_title, Length: 2198, dtype: int32

In [6]:
# Create a DataFrame
channel_stats = pd.DataFrame(
          {"Uploads": upload_count,
           "Days Spent Trending": days_trending,
           "Days Spent Trending Per Video": trending_days_per_vid,
           "Tags Count": tags_per_channel,
          "Views": views_per_channel, 
          "Likes": likes_per_channel,
          "Dislikes": dislikes_per_channel, 
          "Comments": comments_per_channel,
           "Days Until Trending": days_to_trend,
          "Videos With Disabled Comments": comments_disabled_per_channel,
         "Videos With Disabled Ratings": ratings_disabled_per_channel})

channel_stats.index.name='Channel'
# Add comma seperators
# channel_stats["Uploads"] = channel_stats["Uploads"].map("{:,}".format)
# channel_stats["Views"] = channel_stats["Views"].map("{:,}".format)
# channel_stats["Likes"] = channel_stats["Likes"].map("{:,}".format)
# channel_stats["Dislikes"] = channel_stats["Dislikes"].map("{:,}".format)
# channel_stats["Comments"] = channel_stats["Comments"].map("{:,}".format)
# channel_stats

In [7]:
# Create a DataFrame
channel_stats_alt_titles = pd.DataFrame(
          {"upload_count": upload_count,
           "days_trending": days_trending,
           "trending_days_per_vid": trending_days_per_vid,
           "tags_per_channel": tags_per_channel,
          "views_per_channel": views_per_channel, 
          "likes_per_channel": likes_per_channel,
          "dislikes_per_channel": dislikes_per_channel, 
          "comments_per_channel": comments_per_channel,
           "days_to_trend": days_to_trend,
          "comments_disabled_per_channel": comments_disabled_per_channel,
         "ratings_disabled_per_channel": ratings_disabled_per_channel})

channel_stats.index.name='Channel'

In [14]:
channel_cat = []
for uc, dtr in zip(channel_stats_alt_titles['upload_count'], channel_stats_alt_titles['days_trending']):
    if uc == 1 and dtr == 1:
        channel_cat.append("1")
    elif uc == 1 and dtr > 1:
        channel_cat.append("2")
    elif uc > 1 and dtr > 1:
        channel_cat.append("3")
      
channel_stats_alt_titles["channel_cat"] = channel_cat

In [None]:
channel_stats_alt_titles.dtypes

In [None]:
trending_sv_sd = channel_stats_alt_titles[(channel_stats_alt_titles['upload_count'] == 1) & (channel_stats_alt_titles['days_trending'] == 1)]

In [None]:
trending_multi_vid = channel_stats_alt_titles[(channel_stats_alt_titles['upload_count'] > 1) & (channel_stats_alt_titles['days_trending'] > 1)]

trending_single_vid = channel_stats_alt_titles[(channel_stats_alt_titles['upload_count'] == 1) & (channel_stats_alt_titles['days_trending'] > 1)]

In [17]:
vids_per_channel_cat = channel_stats_alt_titles.groupby(['channel_cat']).sum()['upload_count']
vids_per_channel_cat

channel_cat
1     123
2    1266
3    4962
Name: upload_count, dtype: int32

In [18]:
# channel_stats_alt_titles.to_csv('./Data/channel_stats_alt.csv')

In [None]:
# channel_stats.to_csv('./Data/channel_stats.csv')

In [None]:
# Export Csv
# trending_multi_vid.to_csv('./Data/multi_vid_channel_stats.csv')
# trending_single_vid.to_csv('./Data/single_vid_channel_stats.csv')

