# [Mixes - Podcasts - Live performances](https://www.youtube.com/playlist?list=PLOMUdQFdS-XMJ4NFHJlSALYAt4l-LPgS1): Global statistics & Engagement Report

## Librairies

In [105]:
import itertools
import datetime
import pandas as pd
import plotly.express as px

In [106]:
RUN_DATETIME = datetime.datetime.now()
print(f'NOTEBOOK RUN DATETIME: {RUN_DATETIME}')

NOTEBOOK RUN DATETIME: 2022-06-12 10:39:11.136770


## Data import


Including few pre-treatment steps:

- Generating `likes_comments` from sum of `likes` and `comments` features
- Generating `community_score` from `likes_comments` and `views`
- Changing `realease_date` type to datetime
- Generating `release_weekday` and `release_day_hour` from `release_date`

In [107]:
def get_weekday(date: datetime.datetime):
    """Get day of the week out of a datetime
    :param date: date as datetime.datetime object.
    :return: weekday label.
    """
    weekday_list = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']
    week_day_num = datetime.datetime.isoweekday(date)
    return weekday_list[week_day_num - 1]

In [108]:
weekday_cat = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']
weekday_hour_cat = [f'{day_hour[0]}-{day_hour[1]:02d}' for day_hour in itertools.product(weekday_cat, range(0, 24))]

In [109]:
data = pd.read_csv('../data/mix_history.csv').drop(['status', 'live_status'], axis=1)
data

Unnamed: 0,video_id,release_date,channel_id,channel_name,item_id,views,likes,comments,duration
0,VdrEZmcVGdw,2022-06-02 15:00:13+00:00,UCiiN5Ld9pGMwcYrLUJfYlCQ,Mixmash Records,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,332,27,3,3611
1,X7EpursbCsY,2022-06-02 16:00:13+00:00,UCLxqd1S685Mpyk9wy8jkVJQ,Dannic,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,923,62,6,3602
2,s8usv-z539Q,2022-06-02 20:13:04+00:00,UCu5jfQcpRLm9xhmlSd5S8xw,Armin van Buuren,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,158469,4650,226,7240
3,Sh_KjmHvMLY,2022-06-03 07:00:24+00:00,UCsN8M73DMWa8SPp5o_0IAQQ,Tomorrowland,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,11117,474,44,3556
4,QaiNPysoDmI,2022-06-03 08:02:49+00:00,UCO0sfpPwj3PGVVH_jiqBA6A,Future House Cloud,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,3757,95,14,4798
5,ANJqb-2Ejj0,2022-06-03 14:00:06+00:00,UCPk3RMMXAfLhMJPFpQhye9g,Tiësto,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,67211,2178,142,3584
6,rdxw5ilpmQ4,2022-06-03 15:00:11+00:00,UCGZXYc32ri4D0gSLPf2pZXQ,Armada Music TV,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,12393,559,29,5406
7,cPRBXdm5-Wg,2022-06-03 15:00:22+00:00,UCnhHe0_bk_1_0So41vsZvWw,Revealed Music,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,1911,111,6,3616
8,1dMadjMQFN8,2022-06-03 18:00:12+00:00,UCDVKYPXwdYUQfgA05CkyFSg,GameChops,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,61665,4017,133,1384
9,WdnKWW2WY3k,2022-06-03 20:00:12+00:00,UCmKm7HJdOfkWLyml-fzKlVg,Afrojack,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,7519,286,23,3592


In [110]:
data['likes_comments'] = data.likes + data.comments

sum_likes_comments = data.likes_comments.sum()
sum_views = data.views.sum()
ratio = sum_views / sum_likes_comments

data['community_score'] = data.likes_comments / data.views

# TODO: trouve un bon coefficient
# data['lc2_per_views'] = data.likes_comments * 10 / data.views

In [111]:
data.release_date = pd.to_datetime(data['release_date'])
data['release_weekday'] = data.loc[:, 'release_date'].apply(get_weekday)
data['release_hour'] = data['release_date'].dt.hour
data['release_day_hour'] = data.release_weekday + '-' + data.release_hour.apply(lambda x: f'{x:02d}')

data.release_weekday = pd.Categorical(data.release_weekday, categories=weekday_cat)
data.release_day_hour = pd.Categorical(data.release_day_hour, categories=weekday_hour_cat)
# data.release_hour = data.release_date.dt.time

In [112]:
data

Unnamed: 0,video_id,release_date,channel_id,channel_name,item_id,views,likes,comments,duration,likes_comments,community_score,release_weekday,release_hour,release_day_hour
0,VdrEZmcVGdw,2022-06-02 15:00:13+00:00,UCiiN5Ld9pGMwcYrLUJfYlCQ,Mixmash Records,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,332,27,3,3611,30,0.090361,THU,15,THU-15
1,X7EpursbCsY,2022-06-02 16:00:13+00:00,UCLxqd1S685Mpyk9wy8jkVJQ,Dannic,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,923,62,6,3602,68,0.073673,THU,16,THU-16
2,s8usv-z539Q,2022-06-02 20:13:04+00:00,UCu5jfQcpRLm9xhmlSd5S8xw,Armin van Buuren,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,158469,4650,226,7240,4876,0.030769,THU,20,THU-20
3,Sh_KjmHvMLY,2022-06-03 07:00:24+00:00,UCsN8M73DMWa8SPp5o_0IAQQ,Tomorrowland,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,11117,474,44,3556,518,0.046595,FRI,7,FRI-07
4,QaiNPysoDmI,2022-06-03 08:02:49+00:00,UCO0sfpPwj3PGVVH_jiqBA6A,Future House Cloud,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,3757,95,14,4798,109,0.029013,FRI,8,FRI-08
5,ANJqb-2Ejj0,2022-06-03 14:00:06+00:00,UCPk3RMMXAfLhMJPFpQhye9g,Tiësto,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,67211,2178,142,3584,2320,0.034518,FRI,14,FRI-14
6,rdxw5ilpmQ4,2022-06-03 15:00:11+00:00,UCGZXYc32ri4D0gSLPf2pZXQ,Armada Music TV,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,12393,559,29,5406,588,0.047446,FRI,15,FRI-15
7,cPRBXdm5-Wg,2022-06-03 15:00:22+00:00,UCnhHe0_bk_1_0So41vsZvWw,Revealed Music,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,1911,111,6,3616,117,0.061224,FRI,15,FRI-15
8,1dMadjMQFN8,2022-06-03 18:00:12+00:00,UCDVKYPXwdYUQfgA05CkyFSg,GameChops,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,61665,4017,133,1384,4150,0.067299,FRI,18,FRI-18
9,WdnKWW2WY3k,2022-06-03 20:00:12+00:00,UCmKm7HJdOfkWLyml-fzKlVg,Afrojack,UExPTVVkUUZkUy1YTUo0TkZISmxTQUxZQXQ0bC1MUGdTMS...,7519,286,23,3592,309,0.041096,FRI,20,FRI-20


In [113]:
# Channel database: channel ID with channel current name
channel_db = data.loc[:, ['channel_id', 'channel_name']]\
    .drop_duplicates(keep='last')\
    .sort_values('channel_name', ignore_index=True)

channel_db

Unnamed: 0,channel_id,channel_name
0,UCalCDSmZAYD73tqVZ4l8yJg,A State Of Trance
1,UCmKm7HJdOfkWLyml-fzKlVg,Afrojack
2,UCGZXYc32ri4D0gSLPf2pZXQ,Armada Music TV
3,UCu5jfQcpRLm9xhmlSd5S8xw,Armin van Buuren
4,UCLxqd1S685Mpyk9wy8jkVJQ,Dannic
5,UCGXkgynD9YyoEIVyqpaUElw,Enhanced Music
6,UCO0sfpPwj3PGVVH_jiqBA6A,Future House Cloud
7,UCDVKYPXwdYUQfgA05CkyFSg,GameChops
8,UC5H_KXkPbEsGs0tFt8R35mA,Martin Garrix
9,UCiiN5Ld9pGMwcYrLUJfYlCQ,Mixmash Records


## Upload frequency

### Dataframe generation

In [114]:
# Count of video uploaded by channel
upload_count = data.loc[:, ['channel_id', 'item_id']].groupby('channel_id', as_index=False).count().rename\
    (columns={'item_id': 'video_count'})

# Sum of views by channel
stats_sum = data.loc[:, ['channel_id', 'views', 'likes', 'comments', 'likes_comments']].groupby('channel_id', as_index=False).sum()

# Average viewership by channel
stats_avg = data.loc[:, ['channel_id', 'views', 'likes', 'comments', 'likes_comments']].groupby('channel_id', as_index=False).mean()

In [115]:
# Merge with channel DB
upload_count = upload_count.merge(channel_db)
stats_sum = stats_sum.merge(channel_db)
stats_avg = stats_avg.merge(channel_db)

### Graphics

#### Video count

In [116]:
title_1 = 'Video count by channel<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=upload_count, y='video_count', x='channel_name', title=title_1,
             color='video_count', width=960, height=540, color_continuous_scale='orrd')

fig.update_layout(xaxis={'categoryorder': 'total descending'})
fig.update_coloraxes(showscale=False)

fig.show()

#### Total engagement by channel

In [117]:
title_2_1 = 'Total views by channel<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=stats_sum, y='views', x='channel_name', title=title_2_1,
              color='views', width=960, height=540, color_continuous_scale='orrd')

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_coloraxes(showscale=False)

fig.show()

In [118]:
title_2_2 = 'Total likes by channel<br><sup>Since June 2022</sup>'
fig_2 = px.bar(data_frame=stats_sum, y='likes', x='channel_name', title=title_2_2,
              color='likes', width=960, height=540, color_continuous_scale='orrd')

fig_2.update_layout(xaxis={'categoryorder':'total descending'})
fig_2.update_coloraxes(showscale=False)

fig_2.show()

In [119]:
title_2_3 = 'Total comments by channel<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=stats_sum, y='comments', x='channel_name', title=title_2_3,
              color='comments', width=960, height=540, color_continuous_scale='orrd')

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_coloraxes(showscale=False)

fig.show()

In [120]:
title_2_4 = 'Total likes + comments by channel<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=stats_sum, y='likes_comments', x='channel_name', title=title_2_4,
              color='likes_comments', width=960, height=540, color_continuous_scale='orrd')

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_coloraxes(showscale=False)

fig.show()

#### Average engagement by channel

In [121]:
title_3_1 = 'Average viewership by channel<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=stats_avg, y='views', x='channel_name', title=title_3_1,
              color='views', width=960, height=540, color_continuous_scale='orrd')

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_coloraxes(showscale=False)

fig.show()

In [122]:
title_3_2 = 'Average amount of likes by channel<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=stats_avg, y='likes', x='channel_name', title=title_3_2,
              color='likes', width=960, height=540, color_continuous_scale='orrd')

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_coloraxes(showscale=False)

fig.show()

In [123]:
title_3_3 = 'Average amount of comments by channel<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=stats_avg, y='comments', x='channel_name', title=title_3_3,
              color='comments', width=960, height=540, color_continuous_scale='orrd')

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_coloraxes(showscale=False)

fig.show()

In [124]:
title_3_4 = 'Average amount of likes and comments by channel<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=stats_avg, y='likes_comments', x='channel_name', title=title_3_4,
              color='likes_comments', width=960, height=540, color_continuous_scale='orrd')

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_coloraxes(showscale=False)

fig.show()