#  `DIRECTVNOW` Viewership Animation

In [354]:
%matplotlib inline
import numpy as np
import pandas as pd

from bokeh.embed import file_html
from bokeh.io import output_notebook, show
from bokeh.layouts import layout


from bokeh.core.properties import field
from bokeh.io import curdoc
from bokeh.layouts import layout
from bokeh.models import (ColumnDataSource, HoverTool, SingleIntervalTicker, Slider, Button, Label, CategoricalColorMapper)
from bokeh.palettes import Spectral6
from bokeh.plotting import figure

from bokeh.models import (
    ColumnDataSource, Plot, Circle, Range1d, LinearAxis, HoverTool, 
    Text, SingleIntervalTicker, Slider, CustomJS, Legend, LegendItem, CategoricalColorMapper)
from bokeh.palettes import Spectral6


pd.set_option('precision', 2)
pd.set_option('notebook_repr_html', True)

In [140]:
# Import the data and reshape by pivoting
unique_devices = pd.read_excel('./data_csv/device_per_user_bygenre.xlsx')
avg_uniq_dev_per_user_df = unique_devices.pivot(index='new_genre', columns='new_weeks', values='avg_uniq_dev_per_user')
print('\nShapes:', unique_devices.shape, ' pivot to =>> ', avg_uniq_dev_per_user.shape)
# unique_devices.sample(2)
avg_uniq_dev_per_user_df.sample(2)


Shapes: (5525, 3)  pivot to =>>  (620, 13)


new_weeks,1,2,3,4,5,6,7,8,9,10,11,12,13
new_genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
"MYSTERY,THRILLER",,,1.36,1.15,1.27,1.0,1.1,1.09,1.08,1.29,1.15,1.11,1.04
"COMEDY,SCIENCE FICTION,THRILLER",,,1.17,1.0,1.12,1.0,1.0,,,,,,


In [146]:
streamer_population = pd.read_excel('./data_csv/streamer_population_bygenre.xlsx')
streamer_pop_df = streamer_population.pivot(index='new_genre', columns='new_weeks', values='unique_streamer_population')
streamer_pop_df.sample(2)

new_weeks,1,2,3,4,5,6,7,8,9,10,11,12,13
new_genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
"ACTION,DRAMA,SCIENCE FICTION",,3.0,572.0,637.0,569.0,544.0,477.0,1303.0,990.0,659.0,552.0,163.0,5.0
"COMEDY,COMEDY DRAMA,DRAMA",,,1.0,1.0,2.0,1.0,,1.0,,1.0,,,1.0


In [142]:
streams = pd.read_excel('./data_csv/streams_bygenre.xlsx')
pv_stream_df = streams.pivot(index='new_genre', columns='new_weeks', values='streams')
pv_stream_df.sample(2)

new_weeks,1,2,3,4,5,6,7,8,9,10,11,12,13
new_genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
"COMEDY,HORROR,MYSTERY",,,429.0,704.0,678.0,625.0,710.0,1724.0,1132.0,493.0,721.0,728.0,1062.0
"COMEDY,SITCOM",,,272.0,505.0,560.0,651.0,347.0,950.0,880.0,167.0,561.0,184.0,289.0


In [333]:
genre_index_df = pd.read_excel('./data_csv/genre_indexing.xlsx')
# genre_index_df.sample(2)


genre_index_df.video_type.replace(['(VOD) - VOD MOVIES', '(VOD) - LIVE EPISODES'], '(VOD) CONTENTS', inplace=True)
genre_index_df = genre_index_df[['new_genre', 'video_type']].drop_duplicates().set_index('new_genre')
# genre_index_df

In [334]:
# Turn population into bubble sizes. Use min_size and factor to tweak.
scale_factor = 10;
min_bubble_size = 0.5; 
strmr_population_size = np.sqrt(streamer_pop / np.pi) / scale_factor
strmr_population_size_df = strmr_population_size.where(strmr_population_size >= min_bubble_size).fillna(min_bubble_size)

In [324]:
# Create list of app lifetime in weeks
sources = {}
genre_video_type = genre_index_df.video_type
lifetime_wks = avg_uniq_dev_per_user_df.columns
lifetime_wks.name = 'weeks'

for wk in lifetime_wks:
    avg_uniq_dev_per_user_wk = avg_uniq_dev_per_user_df[wk]
    avg_uniq_dev_per_user_wk.name = 'Avg_Unique_Devices_per_User'
    streams_wk = pv_stream_df[wk]
    streams_wk.name = 'Stream_Counts'
    streamer_population_wk = strmr_population_size_df[wk]
    streamer_population_wk.name = 'Unique_Streamers_Population'
    
    new_df = pd.concat([avg_uniq_dev_per_user_wk, streams_wk, streamer_population_wk, genre_index_df], axis=1)
    sources['_' + str(wk)] = ColumnDataSource(new_df)
#     sources[wk] = ColumnDataSource(new_df)

In [321]:
new_df.head()

Unnamed: 0,Avg_Unique_Devices_per_User,Stream_Counts,Unique_Streamers_Population,video_type
,1.14,7920.0,3.35,(VOD) CONTENTS
(LIVE) - GENERAL,1.94,18300000.0,28.4,(LIVE) - GENERAL
(VOD) - NON-GENRED,1.17,29700.0,6.14,(VOD) CONTENTS
ACTION,1.13,2570.0,1.88,(VOD) CONTENTS
"ACTION SPORTS,ADVENTURE,OUTDOORS,REALITY,TRAVEL",1.04,199.0,0.5,(VOD) CONTENTS


In [325]:
sources

{'_1': ColumnDataSource(id='0ead0d1d-6451-4b0e-8c0c-be8d8dbdc00a', ...),
 '_10': ColumnDataSource(id='a32e2a85-c956-47f4-8e05-0f768c238560', ...),
 '_11': ColumnDataSource(id='236ffbbe-23cf-4f80-8761-d6ca4d223a1d', ...),
 '_12': ColumnDataSource(id='57e65690-891f-49ce-be4d-541e78f7e7ed', ...),
 '_13': ColumnDataSource(id='26c68d83-933e-413e-bf9b-71b7453923aa', ...),
 '_2': ColumnDataSource(id='338d0915-d0f8-4514-891f-d41669f8258a', ...),
 '_3': ColumnDataSource(id='95e8ada4-e8dc-44e8-8b2b-5088cacbb7e3', ...),
 '_4': ColumnDataSource(id='02fb4d75-244d-4da4-9519-2b22899a7477', ...),
 '_5': ColumnDataSource(id='5519ba75-0d99-4f31-a2b3-cc2f7e972b8a', ...),
 '_6': ColumnDataSource(id='147ff62c-d74b-4a3c-bb37-1ba663dd9afc', ...),
 '_7': ColumnDataSource(id='02744535-e638-4038-b81f-ff4f7f29e158', ...),
 '_8': ColumnDataSource(id='e6f0d102-9eca-4928-8514-c7856e6df714', ...),
 '_9': ColumnDataSource(id='980cc512-17ec-4cfa-b056-7362146c0cdc', ...)}

#  Build the Plot

In [330]:
dictionary_of_sources = dict(zip([wk for wk in lifetime_wks], ['_%s' % wk for wk in lifetime_wks]))
js_source_array = str(dictionary_of_sources).replace("'", "")

In [343]:
# Set up the plot
xdr = Range1d(1, 5)
ydr = Range1d(pv_stream_df.min().min(), pv_stream_df.max().max())

plot = Plot(
    x_range=xdr,
    y_range=ydr,
    plot_width=1000,
    plot_height=400,
    outline_line_color=None,
    toolbar_location=None, 
    min_border=20,
)

# Build the Axies

In [344]:
AXIS_FORMATS = dict(
    minor_tick_in=None,
    minor_tick_out=None,
    major_tick_in=None,
    major_label_text_font_size="10pt",
    major_label_text_font_style="normal",
    axis_label_text_font_size="10pt",

    axis_line_color='#AAAAAA',
    major_tick_line_color='#AAAAAA',
    major_label_text_color='#666666',

    major_tick_line_cap="round",
    axis_line_cap="round",
    axis_line_width=1,
    major_tick_line_width=1,
)

xaxis = LinearAxis(ticker=SingleIntervalTicker(interval=1), axis_label="Children per woman (total fertility)", **AXIS_FORMATS)
yaxis = LinearAxis(ticker=SingleIntervalTicker(interval=20), axis_label="Life expectancy at birth (years)", **AXIS_FORMATS)   
plot.add_layout(xaxis, 'below')
plot.add_layout(yaxis, 'left')

#  Add the backgroun Week Text

In [345]:
# Add the year in background (add before circle)
text_source = ColumnDataSource({'week': ['%s' % lifetime_wks[0]]})
text = Text(x=2, y=35, text='week', text_font_size='150pt', text_color='#EEEEEE')
plot.add_glyph(text_source, text)

#  Add Bubble and  Hover

In [356]:
# Make a ColorMapper
color_mapper = CategoricalColorMapper(palette=Spectral6, factors=genre_index_df.video_type.unique().tolist())

# Add the circle
renderer_source = sources['_%s' % lifetime_wks[0]]
circle_glyph = Circle(
    x='avg_uniq_dev_per_user_wk', y='streams_wk', size='streamer_population_wk',
    fill_color={'field': 'region', 'transform': color_mapper}, 
    fill_alpha=0.8, 
    line_color='#7c7e71', line_width=0.5, line_alpha=0.5)
circle_renderer = plot.add_glyph(renderer_source, circle_glyph)

# Add the hover (only against the circle and not other plot elements)
tooltips = "@index"
plot.add_tools(HoverTool(tooltips=tooltips, renderers=[circle_renderer]))

# We want a legend for the circles. The legend will be populated based on the label='region' 
# which is a column of the data source - it will take only the unique values.
plot.add_layout(Legend(items=[LegendItem(label='Lifetime', renderers=[circle_renderer])]))

# Render together  with a Slider

In [352]:
# Add the slider
code = """
    var lifetime = slider.value,
        sources = %s,
        new_source_data = sources[lifetime].data;
    renderer_source.data = new_source_data;
    text_source.data = {'lifetime': [String(lifetime)]};
""" % js_source_array

callback = CustomJS(args=sources, code=code)
slider = Slider(start=lifetime_wks[0], end=lifetime_wks[-1], value=1, step=1, title="Weeks", callback=callback)
callback.args["renderer_source"] = renderer_source
callback.args["slider"] = slider
callback.args["text_source"] = text_source

In [357]:
# Stick the plot and the slider together
show(layout([[plot], [slider]], sizing_mode='scale_width'))

ERROR:/Users/RichardAfolabi/anaconda/lib/python3.5/site-packages/bokeh/core/validation/check.py:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: Avg Devices, Streams, population, region [renderer: GlyphRenderer(id='b97fd0d1-0367-41a1-84bc-de6a0effa3fa', ...)]
ERROR:/Users/RichardAfolabi/anaconda/lib/python3.5/site-packages/bokeh/core/validation/check.py:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: avg_uniq_dev_per_user_wk, region, streamer_population_wk, streams_wk [renderer: GlyphRenderer(id='5da084ec-0903-4d94-b091-bc5d3aab5f20', ...)]
ERROR:/Users/RichardAfolabi/anaconda/lib/python3.5/site-packages/bokeh/core/validation/check.py:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: Avg Devices, Streams, population, region [renderer: GlyphRenderer(id='b97fd0d1-0367-41a1-84bc-de6a0effa3fa', ...)]
ERROR:/Users/RichardAfolabi/anaconda/lib/python3.5/site-packages/bokeh/core/validation/check.py:E-1001 (BAD_COLUMN_NAME): Glyph refers to 

In [None]:
    avg_uniq_dev_per_user_wk = avg_uniq_dev_per_user_df[wk]
    avg_uniq_dev_per_user_wk.name = 'Avg_Unique_Devices_per_User'
    streams_wk = pv_stream_df[wk]
    streams_wk.name = 'Stream_Counts'
    streamer_population_wk = strmr_population_size_df[wk]
    streamer_population_wk.name = 'Unique_Streamers_Population'