# [Mixes - Podcasts - Live performances](https://www.youtube.com/playlist?list=PLOMUdQFdS-XMJ4NFHJlSALYAt4l-LPgS1): Global statistics & Engagement Report

## Librairies

In [None]:
import itertools
import datetime
import pandas as pd
import plotly.express as px

In [None]:
RUN_DATETIME = datetime.datetime.now()
SINCE_0106 = (RUN_DATETIME - datetime.datetime.strptime('2022-06-01', '%Y-%m-%d')).days
SINCE_0106_WEEKS = SINCE_0106 / 7
print(f'NOTEBOOK RUN DATETIME: {RUN_DATETIME}')

## Data import


Including few pre-treatment steps:

- Generating `likes_comments` from sum of `likes` and `comments` features
- Changing `realease_date` type to datetime
- Generating `release_weekday` and `release_day_hour` from `release_date`

In [None]:
def get_weekday(date: datetime.datetime):
    """Get day of the week out of a datetime
    :param date: date as datetime.datetime object.
    :return: weekday label.
    """
    weekday_list = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']
    week_day_num = datetime.datetime.isoweekday(date)
    return weekday_list[week_day_num - 1]

In [None]:
weekday_cat = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']
weekday_hour_cat = [f'{day_hour[0]}-{day_hour[1]:02d}' for day_hour in itertools.product(weekday_cat, range(0, 24))]

In [None]:
data = pd.read_csv('../data/mix_history.csv').drop(['status', 'live_status'], axis=1)
data

In [None]:
data['likes_comments'] = data.likes + data.comments

In [None]:
data.release_date = pd.to_datetime(data['release_date'])
data['release_weekday'] = data.loc[:, 'release_date'].apply(get_weekday)
data['release_hour'] = data['release_date'].dt.hour
data['release_day_hour'] = data.release_weekday + '-' + data.release_hour.apply(lambda x: f'{x:02d}')

data.release_weekday = pd.Categorical(data.release_weekday, categories=weekday_cat)
data.release_day_hour = pd.Categorical(data.release_day_hour, categories=weekday_hour_cat)

In [None]:
data.head()

In [None]:
# Channel database: channel ID with channel current name
channel_db = data.loc[:, ['channel_id', 'channel_name']]\
    .drop_duplicates(keep='last')\
    .sort_values('channel_name', ignore_index=True)

channel_db

## Upload frequency

### Data Generation

In [None]:
# Count of video uploaded per weekday / hour / weekday and hour

new_data = data.loc[:, ['release_weekday', 'release_hour', 'release_day_hour']]
new_data['video_count'] = 1

day_count = new_data.loc[:, ['release_weekday', 'video_count']].groupby('release_weekday', as_index=False).sum()
day_count['average'] = day_count['video_count'] / SINCE_0106_WEEKS

hour_count = new_data.loc[:, ['release_hour', 'video_count']].groupby('release_hour', as_index=False).sum()
hour_count['proportion'] = hour_count['video_count'] / hour_count.video_count.sum() * 100

day_hour_count = new_data.loc[:, ['release_day_hour', 'video_count']].groupby('release_day_hour', as_index=False).sum()
day_hour_count['proportion'] = day_hour_count['video_count'] / day_hour_count.video_count.sum() * 100

### Graphics

In [None]:
title_0 = 'Average amount of upload per weekday<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=day_count, y='average', x='release_weekday', title=title_0,
             color='average', width=960, height=540, color_continuous_scale='orrd')

fig.update_coloraxes(showscale=False)

fig.show()

In [None]:
title_00 = 'Upload proportion per hour<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=hour_count, y='proportion', x='release_hour', title=title_00,
             color='proportion', width=960, height=540, color_continuous_scale='orrd')

fig.update_coloraxes(showscale=False)

fig.show()

In [None]:
title_000 = 'Upload proportion per weekday & hour<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=day_hour_count, y='proportion', x='release_day_hour', title=title_000,
             color='proportion', width=960, height=540, color_continuous_scale='orrd')

fig.update_coloraxes(showscale=False)

fig.show()

## Audience

### Dataframe generation

In [None]:
# Count of video uploaded by channel
upload_count = data.loc[:, ['channel_id', 'item_id']].groupby('channel_id', as_index=False).count().rename\
    (columns={'item_id': 'video_count'})

# Sum of views by channel
stats_sum = data.loc[:, ['channel_id', 'views', 'likes', 'comments', 'likes_comments']].groupby('channel_id', as_index=False).sum()

# Average viewership by channel
stats_avg = data.loc[:, ['channel_id', 'views', 'likes', 'comments', 'likes_comments']].groupby('channel_id', as_index=False).mean()

In [None]:
# Merge with channel DB
upload_count = upload_count.merge(channel_db)
stats_sum = stats_sum.merge(channel_db)
stats_avg = stats_avg.merge(channel_db)

### Graphics

#### Video count

In [None]:
title_1 = 'Video count by channel<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=upload_count, y='video_count', x='channel_name', title=title_1,
             color='video_count', width=960, height=540, color_continuous_scale='orrd')

fig.update_layout(xaxis={'categoryorder': 'total descending'})
fig.update_coloraxes(showscale=False)

fig.show()

#### Total engagement by channel

In [None]:
title_2_1 = 'Total views by channel<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=stats_sum, y='views', x='channel_name', title=title_2_1,
              color='views', width=960, height=540, color_continuous_scale='orrd')

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_coloraxes(showscale=False)

fig.show()

In [None]:
title_2_2 = 'Total likes by channel<br><sup>Since June 2022</sup>'
fig_2 = px.bar(data_frame=stats_sum, y='likes', x='channel_name', title=title_2_2,
              color='likes', width=960, height=540, color_continuous_scale='orrd')

fig_2.update_layout(xaxis={'categoryorder':'total descending'})
fig_2.update_coloraxes(showscale=False)

fig_2.show()

In [None]:
title_2_3 = 'Total comments by channel<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=stats_sum, y='comments', x='channel_name', title=title_2_3,
              color='comments', width=960, height=540, color_continuous_scale='orrd')

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_coloraxes(showscale=False)

fig.show()

In [None]:
title_2_4 = 'Total likes + comments by channel<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=stats_sum, y='likes_comments', x='channel_name', title=title_2_4,
              color='likes_comments', width=960, height=540, color_continuous_scale='orrd')

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_coloraxes(showscale=False)

fig.show()

#### Average engagement by channel

In [None]:
title_3_1 = 'Average viewership by channel<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=stats_avg, y='views', x='channel_name', title=title_3_1,
              color='views', width=960, height=540, color_continuous_scale='orrd')

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_coloraxes(showscale=False)

fig.show()

In [None]:
title_3_2 = 'Average amount of likes by channel<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=stats_avg, y='likes', x='channel_name', title=title_3_2,
              color='likes', width=960, height=540, color_continuous_scale='orrd')

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_coloraxes(showscale=False)

fig.show()

In [None]:
title_3_3 = 'Average amount of comments by channel<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=stats_avg, y='comments', x='channel_name', title=title_3_3,
              color='comments', width=960, height=540, color_continuous_scale='orrd')

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_coloraxes(showscale=False)

fig.show()

In [None]:
title_3_4 = 'Average amount of likes and comments by channel<br><sup>Since June 2022</sup>'
fig = px.bar(data_frame=stats_avg, y='likes_comments', x='channel_name', title=title_3_4,
              color='likes_comments', width=960, height=540, color_continuous_scale='orrd')

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_coloraxes(showscale=False)

fig.show()

## Clear outputs

In [None]:
!jupyter nbconvert --clear-output --inplace playlists_report.ipynb