In [1]:
from jupyter_dash import JupyterDash
import dash
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import plotly.express as px
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output, State
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import spacy
from rank_bm25 import BM25Okapi
from tqdm import tqdm
import datetime
import time
import plotly.graph_objects as go
import wordcloud

In [2]:
#Baca df
CA = pd.read_csv('CAvideos.csv') #Canada
GB = pd.read_csv('GBvideos.csv') #Great Britain
US = pd.read_csv('USvideos.csv') #USA


CA['region'] = 'Canada'
GB['region'] = 'Britain'
US['region'] = 'USA'
df = pd.concat([CA,GB,US])
#judul
df['len_title'] = df['title'].apply(lambda x:len(x))
#waktu
df["publishing_day"] = df["publish_time"].apply(lambda x: datetime.datetime.strptime(x[:10], "%Y-%m-%d").date().strftime('%a'))
df['publish_time'] = pd.to_datetime(df['publish_time'], format='%Y-%m-%dT%H:%M:%S.%fZ')
df['trending_date'] = df['trending_date'].apply(lambda y: '20'+y)
df['trending_date'] = pd.to_datetime(df['trending_date'], format='%Y.%d.%m')
df['publish_date'] = df['publish_time'].dt.date
df['publish_hours'] = df['publish_time'].dt.time
df['publish_date'] =  pd.to_datetime(df['publish_date'], format='%Y-%m-%d')
df[['hour','min','sec']] = df['publish_hours'].astype(str).str.split(':', expand=True).astype(int)
df['days_to_trending'] = df['trending_date'] - df['publish_date']
df['days_to_trending'] = df['days_to_trending'].astype(str)
df['days_to_trending'] = df['days_to_trending'].apply(lambda x:x.replace('days','')).astype(int)
df['publish_year'] = df['publish_date'].dt.year
df['publish_month'] = df['publish_date'].dt.month
df['trending_years'] = df['trending_date'].dt.year
df['trending_month'] = df['trending_date'].dt.month
#Category name
import json
with open("CA_category_id.json") as f:
    categories = json.load(f)["items"]
cat_dict = {}
for cat in categories:
    cat_dict[int(cat["id"])] = cat["snippet"]["title"]
df['category_name'] = df['category_id'].map(cat_dict)

df['len_title'] = df['title'].apply(lambda x:len(x))
#log
df['likes_log'] = np.log(df['likes'] + 1)
df['views_log'] = np.log(df['views'] + 1)
df['dislikes_log'] = np.log(df['dislikes'] + 1)
df['comment_count_log'] = np.log(df['comment_count'] + 1)

region = list(df['region'].unique())

category = list(df['category_name'].unique())
category.remove(np.nan)

years={str(year): str(year) for year in df['trending_years'].unique()}


In [3]:
#Pembuatan table
def generate_table(arr):
    return html.Table([
        html.Thead(
            html.Tr(['Number','Title'])
        ),
        html.Tbody([
            html.Tr([
                html.Td(arr[i].capitalize()) for i in range(0,len(arr))
            ]) 
        ])
    ])
    

In [4]:
#Pembuatan Model Search Engine
nlp = spacy.load("en_core_web_sm")
df1 = df.drop_duplicates(subset=['title'])
text_list = df1.title.str.lower().values
tok_text=[] # for our tokenised corpus
#Tokenising using SpaCy:
for doc in tqdm(nlp.pipe(text_list, disable=["tagger", "parser","ner"])):
    tok = [t.text for t in doc if t.is_alpha]
    tok_text.append(tok)
bm25 = BM25Okapi(tok_text)


30626it [00:04, 7468.99it/s]


In [5]:
def search_df(df,region, tipe,kategori,year_value,filters):
    if filters == 'off':
        return df
    else:
        reg = df['region'].isin(region)
        dff = df[reg]
        cat = dff['category_name'].isin(kategori)
        dff = dff[cat]
        year = dff['trending_years'].isin( year_value)
        dff = dff[year]
    return dff

# Layout

## Counts

In [6]:
list_group1 = dbc.Card(
		dbc.ListGroup(
		[
			dbc.ListGroupItem(
				[
					dbc.ListGroupItemHeading("Dislikes Count"),
					dbc.ListGroupItemText(id='dislikes_count'),
				]
			, color='light'),
			dbc.ListGroupItem(
				[
					dbc.ListGroupItemHeading("Likes Count"),
					dbc.ListGroupItemText(id='likes_count'),
				]
			, color='secondary'),
			dbc.ListGroupItem(
				[
					dbc.ListGroupItemHeading("Comments Count"),
					dbc.ListGroupItemText(id='comments_count'),
				]
			, color='dark'),
            dbc.ListGroupItem(
				[
					dbc.ListGroupItemHeading("Views Count"),
					dbc.ListGroupItemText(id='views_count'),
				]
			, color='dark'),
		],
	flush=True,
	),
	color='success',
)

## Row Satu

In [7]:
rowsatu = dbc.Row([
		dbc.Col(
			dbc.Card([
				dbc.CardHeader(html.Div(html.H4(id='top20_header')),),
				dbc.CardBody([
					html.Div([ 
						dcc.Graph(
						id='top10_channel',
						hoverData={'points': [{'customdata': 'ChildishGambinoVEVO'}]}
						)
					])
				])
			], className="w-100", color='success', outline=True), 
		width=6),
		dbc.Col([
			dbc.Row([
				dbc.CardGroup([
					dbc.Card([
						dbc.CardHeader(html.Div(html.H4(id = 'pie_header')),),
						dbc.CardBody([
							html.Div([dcc.Graph(id='pie_plot')]), 
						]),
					], className='w-100', color='success', outline=True),
					html.Div(list_group1), 
				], className='w-100'),
			]),
			dbc.Row([
				dbc.Card([
					dbc.CardHeader(html.Div(html.H4(id='dist_header')),),
					dbc.CardBody([
						html.Div(dcc.Graph(id='histogram')),
					]),
				], className='w-100', color='success', outline=True),
			]),
		], width=6),
])

## Row Dua

In [8]:
rowdua = dbc.Row([
		dbc.Col([
			dbc.Card([
			dbc.CardHeader(html.Div(html.H4(children='Published Videos by Day')),),
			dbc.CardBody([
					html.Div([
						dcc.Graph(
							id='publish_date_by_day',
							hoverData={'points': [{'x': 'Mon'}]}
						),
					]),
				]),
		], className='w-100', color='success', outline=True),
        ], width=6),
        dbc.Col([
			dbc.Card([
				dbc.CardHeader(html.Div(html.H4(id='publish_hour_header')),),
				dbc.CardBody([
					dbc.Row([
						dbc.Col([html.Div([
								dcc.Graph(id='publish_date_by_hour'),
							])
						], width=9),
						dbc.Col([html.Div([
								html.Label(children='Sort by Day?'),
								dbc.RadioItems(
									id = 'sort_by_day',
									options=[
									{'label': 'on', 'value': 'on'},
									{'label': 'off', 'value': 'off'},
									],
									value='off'
								),
							])], width=3),
					]),
				]),
			], className='w-100', color='success', outline=True),
		], width=6),
]),

## Row Tiga

In [9]:
rowtiga = dbc.Row([
	dbc.Col([
		dbc.Card([
			dbc.CardHeader(html.Div(html.H4(id='statistic_over_time')),),
			dbc.CardBody([
				html.Div([
					dcc.Graph(id='time_series'),
				]),
			]),
		], className='w-100', color='success', outline=True),
	], width=6),
	dbc.Col([
		dbc.Card([
			dbc.CardHeader(html.Div(html.H4('Many Videos to be Trending')),),
			dbc.CardBody([
				html.Div([
					dcc.Graph(id='days_to_trending'),
				]),
			]),
		], className='w-100', color='success', outline=True),
	], width=6),
]),


## Row Empat

In [10]:
rowempat = dbc.Row([
	dbc.Col([
		dbc.Card([
			dbc.CardHeader(html.Div(html.H4(id = 'scatter_header')),),
			dbc.CardBody([
				html.Div([
					dcc.Graph(id = 'scatter_plot'),
				]),
			]),
		], className='w-100', color='success', outline=True),
	], width=6),
	dbc.Col([
		dbc.Row([
			dbc.Card([
				dbc.CardHeader(html.Div(html.H4('Corelation between Features by Day')),),
				dbc.CardBody([
					html.Div([
						dcc.Graph(id='radar_corr'),
					]),
				]),
			], className='w-100', color='success', outline=True),
		]),
		dbc.Row([
			dbc.Card([
				dbc.CardHeader(html.Div(html.H4('Corelation between Features in General')),),
				dbc.CardBody([
					html.Div([
						dcc.Graph(id = 'all_feature_corellation',hoverData={'points': [{'x': 'views','y':'likes'}]}),
					]),
				]),
			], className='w-100', color='success', outline=True),
		]),
	], width=6),
]),

## Row Lima

In [11]:
rowlima = dbc.Row([
	dbc.Col([
		dbc.Card([
			dbc.CardHeader(html.Div(html.H4('Most Occuring Word in Trending Videos Title')),),
			dbc.CardBody([
				html.Div([
					dcc.Graph(id='wordcloud'),
				]),
			]),
		], className='w-100', color='success', outline=True),
	], width=6),
	dbc.Col([
		dbc.Card([
			dbc.CardHeader(html.Div(html.H4('Distribution of Trending Videos Title Length')),),
			dbc.CardBody([
				html.Div([
					dcc.Graph(id='len_dist'),
				]),
			]),
		], className='w-100', color='success', outline=True),
	], width=6),
]),


# App Layout

In [12]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = JupyterDash(external_stylesheets=[dbc.themes.CYBORG])

app.layout = html.Div([
	dbc.Card([
		dbc.CardHeader([
			html.Div([
				html.H3("Dashboard Youtube Trending"),
			]),
		]),
		dbc.CardBody([
			dbc.Card([
				dbc.CardBody([
					dbc.Row([
						dbc.Col([
						html.Div([
						 dbc.Input(id='input-1-state', type='text', value='Eminem', placeholder = "Search Videos..."),
						   ]),
						], width={'size':7, 'offset':2}),
						dbc.Col([
						 html.Div([
						  dbc.Button(id='submit-button-state', n_clicks=0, children='Submit'),
						 ]),
						], width=1),
					   ], no_gutters=True, align='center'),
					dbc.Row([
						dbc.Col([
							html.Div([
								html.H4(''' Here's What You Might Also Like:'''),
								html.Table([
								html.Tr([html.Td('1. '),html.Td(id='nama1')]),
								html.Tr([html.Td('2. '),html.Td(id='nama2')]),
								html.Tr([html.Td('3. '),html.Td(id='nama3')]),
								html.Tr([html.Td('4. '),html.Td(id='nama4')]),
								html.Tr([html.Td('5. '),html.Td(id='nama5')]),
								]),
							]),
						], width={'offset':4}),
					], align='center'),
				]),
			],),
						dbc.Row([
				dbc.Col([
					dbc.Row([
						dbc.Col([
							html.Div([
								dcc.Dropdown(
									id='region',
									options=[{'label': i, 'value': i} for i in region],
									value=['Canada'],
									multi=True,
									searchable=True
								),
							], style={'width':'100%'}),
						], width=6),
						dbc.Col([
							html.Div([
								dcc.Dropdown(
									id='kategori',
									options=[{'label': c, 'value': c} for c in category],
									value=['Music'],
									multi=True,
									searchable=True
								),
							], style={'width':'100%'}),
						], width=6),
					], no_gutters=True),
					dbc.Row([
						dbc.Col([
							html.Div([
								dcc.Dropdown(
									id='tipe',
									options = [{'label': i, 'value': i} for i in ['views', 'likes','dislikes','comment_count']],
									value='views',
									searchable=True
								),
							], style={'width':'100%'}),
						], width=6),
						dbc.Col([
							html.Div([
								dcc.Dropdown(
									id='sort',
									options=[{'label': i, 'value': i} for i in ['title', 'channel_title']],
									value='channel_title',
									searchable=True
								),
							], style={'width':'100%'}),
						], width=6),
					], no_gutters=True),
					dbc.Row([
						dbc.Col([
							html.Div([
								dcc.Dropdown(
									id='crossfilter-year--slider',
									options = [{'label': c, 'value': c} for c in [2017,2018]],
									value=[2018],
									multi=True,
									searchable=True
								)
							], style={'width':'100%'}),
						], width=6),
						dbc.Col([
							html.Div([
								dbc.RadioItems(
									id = 'filter',
									options=[
									{'label': 'Filter Dataframe', 'value': 'on'},
									{'label': "Don't Filter Data frame", 'value': 'off'},
									],
									value='on',
									inline= True
								),
							], style={'width':'100%'}),
						], width={'size':5, 'offset':1}),
					], no_gutters=True),
				], width=12),
			]),
			
		]),
	], color='light'),

    dbc.Card(
    [
		dbc.CardBody([html.Div(rowsatu)]),
    ], 
    style={"width": "18rem"},
    className="w-100", color='success', outline=True,
    ),
    
    dbc.Card(
    [
		dbc.CardBody([
			html.Div(rowdua),
			html.Div(rowtiga),
			html.Div(rowempat),
			html.Div(rowlima),
		]),
    ], 
    style={"width": "18rem"},
    className="w-100", color='success', outline=True,
    ),
])


# Callbacks

## Search dan Filter

In [13]:
#search
@app.callback([Output('nama1', 'children'),
                Output('nama2', 'children'),
                Output('nama3', 'children'),
                Output('nama4', 'children'),
                Output('nama5', 'children')],
                [Input('submit-button-state', 'n_clicks')],
                [State('input-1-state', 'value')])
def generate_table(n_clicks,query):
    tokenized_query = query.lower().split(" ")
    results = bm25.get_top_n(tokenized_query, df1.title.values, n=15)
    return results[0],results[1],results[2],results[3],results[4]

## Row Satu

In [14]:
#top20 header
@app.callback(
    dash.dependencies.Output('top20_header', 'children'),
    [dash.dependencies.Input('kategori', 'value'),
     dash.dependencies.Input('sort', 'value'),
     dash.dependencies.Input('filter', 'value')])
def title_top20(kategori, sort,filters):
    sort = sort.capitalize()
    kat = [k.capitalize() for k in kategori]
    if filters == 'off':
        return f'Top 20 {sort} in All category'
    else:
        return f'Top 20 {sort} in {kategori} category'

#pie header
@app.callback(
    dash.dependencies.Output('pie_header', 'children'),
    [dash.dependencies.Input('top10_channel', 'hoverData')])
def title_top20(hover):
    nama = hover['points'][0]['customdata'].capitalize()
    return f'Likes, Dislikes, and Comments Ratio in {nama}'

#dist header
@app.callback(
    dash.dependencies.Output('dist_header', 'children'),
    [dash.dependencies.Input('tipe', 'value'),
     dash.dependencies.Input('sort', 'value')])
def title_top20(tipe,sort):
    tipe = tipe.capitalize()
    sort = sort.capitalize()
    return f'{sort} Distribution by {tipe}'

#Top 20 Channels
@app.callback(
    dash.dependencies.Output('top10_channel', 'figure'),
    [dash.dependencies.Input('region', 'value'),
     dash.dependencies.Input('tipe', 'value'),
     dash.dependencies.Input('kategori', 'value'),
     dash.dependencies.Input('sort', 'value'),
     dash.dependencies.Input('crossfilter-year--slider', 'value'),
     dash.dependencies.Input('filter', 'value')])
def update_top10(region, tipe,kategori, sort,year_value,filters):  
    dff = search_df(df,region,tipe,kategori,year_value,filters)
    dff = dff.groupby(sort).sum().sort_values(tipe,ascending=False)
    fig = px.bar(y=dff.iloc[:20][tipe],x=dff.index[:20].tolist(),hover_name=dff.index[:20].tolist(),color=dff.iloc[:20][tipe], color_continuous_scale='Aggrnyl')
    fig.update_traces(customdata=dff.index[:20].tolist())
    fig.update_layout(height=640,margin={'l': 40, 'b': 40, 't': 10, 'r': 0}, hovermode='closest', font_color='white', paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)',
                     xaxis_title=sort,yaxis_title=tipe)
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    return fig

#histogram
@app.callback(
    dash.dependencies.Output('histogram', 'figure'),
    [dash.dependencies.Input('region', 'value'),
     dash.dependencies.Input('tipe', 'value'),
     dash.dependencies.Input('kategori', 'value'),
     dash.dependencies.Input('crossfilter-year--slider', 'value'),
     dash.dependencies.Input('filter', 'value')])
def update_histogram(region, tipe,kategori,year_value,filters):
    dff = search_df(df,region,tipe,kategori,year_value,filters)
    pilihan = tipe+'_log'
    fig = px.histogram(df[pilihan],[pilihan], color_discrete_sequence=px.colors.sequential.Aggrnyl)
    fig.update_yaxes(showgrid=False)
    fig.update_layout(height=245,xaxis=dict(rangeslider=dict(visible=True)), margin={'l': 20, 'b': 30, 'r': 10, 't': 10}, paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', font_color='white')
    return fig

#Pie plot
@app.callback(
    dash.dependencies.Output('pie_plot', 'figure'),
    [dash.dependencies.Input('top10_channel', 'hoverData'),
     dash.dependencies.Input('region', 'value'),
     dash.dependencies.Input('tipe', 'value'),
     dash.dependencies.Input('kategori', 'value'),
     dash.dependencies.Input('sort', 'value'),
     dash.dependencies.Input('crossfilter-year--slider', 'value'),
     dash.dependencies.Input('filter', 'value')])

def update_pie_plot(hoverData, region,tipe,kategori,sort,year_value,filters):
    dff = search_df(df,region,tipe,kategori,year_value,filters)
    dff = dff.groupby(sort).sum().sort_values(tipe,ascending=False)
    dff = dff[dff.index == hoverData['points'][0]['customdata']]
    fig = px.pie(values=[dff.iat[0,2],dff.iat[0,3],dff.iat[0,4]], names=['likes','dislikes','comments'], color_discrete_sequence=px.colors.sequential.Aggrnyl)
    fig.update_layout(height=243, margin={'l': 20, 'b': 30, 'r': 10, 't': 10}, paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', font_color='white')
    return fig

#likes_count
@app.callback(
    dash.dependencies.Output('likes_count', 'children'),
    [dash.dependencies.Input('top10_channel', 'hoverData'),
     dash.dependencies.Input('region', 'value'),
     dash.dependencies.Input('tipe', 'value'),
     dash.dependencies.Input('kategori', 'value'),
     dash.dependencies.Input('sort', 'value'),
     dash.dependencies.Input('crossfilter-year--slider', 'value'),
     dash.dependencies.Input('filter', 'value')])
def likes_count(name,region,tipe,kategori,sort,year_value,filters):
    dff = search_df(df,region,tipe,kategori,year_value,filters)
    dff = dff.groupby(sort).sum().sort_values(tipe,ascending=False)
    dff = dff[dff.index==name['points'][0]['customdata']]
    val = dff['likes'].values[0]
    val = round(val/1000000,3)
    return (str(val)+" M")

#dislikes_count
@app.callback(
    dash.dependencies.Output('dislikes_count', 'children'),
    [dash.dependencies.Input('top10_channel', 'hoverData'),
     dash.dependencies.Input('region', 'value'),
     dash.dependencies.Input('tipe', 'value'),
     dash.dependencies.Input('kategori', 'value'),
     dash.dependencies.Input('sort', 'value'),
     dash.dependencies.Input('crossfilter-year--slider', 'value'),
     dash.dependencies.Input('filter', 'value')])
def likes_count(name,region,tipe,kategori,sort,year_value,filters):
    dff = search_df(df,region,tipe,kategori,year_value,filters)
    dff = dff.groupby(sort).sum().sort_values(tipe,ascending=False)
    dff = dff[dff.index==name['points'][0]['customdata']]
    val = dff['dislikes'].values[0]
    val = round(val/1000000,3)
    return (str(val)+" M")

#comments_count
@app.callback(
    dash.dependencies.Output('comments_count', 'children'),
    [dash.dependencies.Input('top10_channel', 'hoverData'),
     dash.dependencies.Input('region', 'value'),
     dash.dependencies.Input('tipe', 'value'),
     dash.dependencies.Input('kategori', 'value'),
     dash.dependencies.Input('sort', 'value'),
     dash.dependencies.Input('crossfilter-year--slider', 'value'),
     dash.dependencies.Input('filter', 'value')])
def likes_count(name,region,tipe,kategori,sort,year_value,filters):
    dff = search_df(df,region,tipe,kategori,year_value,filters)
    dff = dff.groupby(sort).sum().sort_values(tipe,ascending=False)
    dff = dff[dff.index==name['points'][0]['customdata']]
    val = dff['comment_count'].values[0]
    val = round(val/1000000,3)
    return (str(val)+" M")

#views_count
@app.callback(
    dash.dependencies.Output('views_count', 'children'),
    [dash.dependencies.Input('top10_channel', 'hoverData'),
     dash.dependencies.Input('region', 'value'),
     dash.dependencies.Input('tipe', 'value'),
     dash.dependencies.Input('kategori', 'value'),
     dash.dependencies.Input('sort', 'value'),
     dash.dependencies.Input('crossfilter-year--slider', 'value'),
     dash.dependencies.Input('filter', 'value')])
def likes_count(name,region,tipe,kategori,sort,year_value,filters):
    dff = search_df(df,region,tipe,kategori,year_value,filters)
    dff = dff.groupby(sort).sum().sort_values(tipe,ascending=False)
    dff = dff[dff.index==name['points'][0]['customdata']]
    val = dff['views'].values[0]
    val = round(val/1000000,3)
    return (str(val)+" M")


# Row Dua

In [15]:
#publish_date_by_day
@app.callback(
    dash.dependencies.Output('publish_date_by_day', 'figure'),
    [dash.dependencies.Input('region', 'value'),
     dash.dependencies.Input('tipe', 'value'),
     dash.dependencies.Input('kategori', 'value'),
     dash.dependencies.Input('crossfilter-year--slider', 'value'),
     dash.dependencies.Input('filter', 'value')])
def update_publish_date_by_day(region, tipe,kategori,year_value,filters):
    dff = search_df(df,region,tipe,kategori,year_value,filters)
    dff = dff.groupby('publishing_day').size().reindex(['Mon','Tue','Wed','Thu','Fri','Sat','Sun'])
    fig = px.bar(x=dff.index.tolist(),y=list(dff.values),color=list(dff.values), color_continuous_scale='Aggrnyl')
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', font_color='white',xaxis_title="Day",yaxis_title="Jumlah")
    return fig

#publish_hour_header
@app.callback(
    dash.dependencies.Output('publish_hour_header', 'children'),
    [dash.dependencies.Input('sort_by_day', 'value'),
     dash.dependencies.Input('publish_date_by_day', 'hoverData')])
def title_publish_hour(sort,hover):
    if sort == 'on':
        day = hover['points'][0]['x']
        day = day+'day'
        return f'Published Videos by Hours on {day}'
    else:
        return f'Published Videos by Hours'
    
#publish_date_by_hour
@app.callback(
    dash.dependencies.Output('publish_date_by_hour', 'figure'),
    [dash.dependencies.Input('publish_date_by_day', 'hoverData'),
     dash.dependencies.Input('region', 'value'),
     dash.dependencies.Input('tipe', 'value'),
     dash.dependencies.Input('kategori', 'value'),
     dash.dependencies.Input('crossfilter-year--slider', 'value'),
     dash.dependencies.Input('sort_by_day', 'value'),
     dash.dependencies.Input('filter', 'value')])
def update_publish_date_by_hour(hoverData,region, tipe,kategori,year_value,sort_by_day,filters):
    dff = search_df(df,region,tipe,kategori,year_value,filters)
    
    if sort_by_day == 'off':
        dff = dff.groupby('hour').size()
    else:
        dff = dff[dff['publishing_day']==hoverData['points'][0]['x']]
        dff = dff.groupby('hour').size()
    fig = px.bar(x=dff.index.tolist(),y=list(dff.values),hover_name=dff.index.tolist(),color=list(dff.values), color_continuous_scale='Aggrnyl')
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', font_color='white',xaxis_title="Day",yaxis_title="Jumlah")
    return fig


## Row Tiga

In [16]:
#Time Series
@app.callback(
    dash.dependencies.Output('time_series', 'figure'),
    [dash.dependencies.Input('region', 'value'),
     dash.dependencies.Input('tipe', 'value'),
     dash.dependencies.Input('kategori', 'value'),
     dash.dependencies.Input('crossfilter-year--slider', 'value'),
     dash.dependencies.Input('filter', 'value')])
def update_time_series(region, tipe,kategori,year_value,filters):
	dff = search_df(df,region,tipe,kategori,year_value,filters)
	dff = dff.groupby('trending_date').sum()
	fig = px.line(x=dff.index.tolist() , y=dff[tipe],hover_name=dff[tipe], color_discrete_sequence=px.colors.sequential.Aggrnyl)
	fig.update_layout(
	xaxis=dict(
		rangeselector=dict(
			buttons=list([
				dict(count=1,
					 label="1m",
					 step="month",
					 stepmode="backward"),
				dict(count=6,
					 label="6m",
					 step="month",
					 stepmode="backward"),
				dict(step="all")
			])
		),
		rangeslider=dict(
			visible=True
		),
		type="date"
	)
	)
	fig.update_traces(mode='lines+markers')
	fig.update_xaxes(showgrid=False)
	fig.update_yaxes(showgrid=False)
	fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', font_color='white',xaxis_title="Time",yaxis_title="Jumlah")
	return fig

#Days to trending
@app.callback(
    dash.dependencies.Output('days_to_trending', 'figure'),
    [dash.dependencies.Input('region', 'value'),
     dash.dependencies.Input('tipe', 'value'),
     dash.dependencies.Input('kategori', 'value'),
     dash.dependencies.Input('crossfilter-year--slider', 'value'),
     dash.dependencies.Input('filter', 'value')])
def update_days_to_trending(region, tipe,
                 kategori,
                 year_value,filters):
    dff = search_df(df,region,tipe,kategori,year_value,filters)
    dff = df.groupby('days_to_trending').size()
    fig = px.bar(x=dff[:11].index.tolist(),y=list(dff[:11].values),hover_name=dff[:11].index.tolist(),color=list(dff[:11].values), color_continuous_scale='Aggrnyl')
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', font_color='white',xaxis_title="Days",yaxis_title="Jumlah")
    return fig

## Row Empat

In [17]:
#all_feature_corellation
@app.callback(
    dash.dependencies.Output('all_feature_corellation', 'figure'),
    [dash.dependencies.Input('region', 'value'),
     dash.dependencies.Input('tipe', 'value'),
     dash.dependencies.Input('kategori', 'value'),
     dash.dependencies.Input('crossfilter-year--slider', 'value'),
     dash.dependencies.Input('filter', 'value')])
def update_all_corr(region, tipe,kategori,year_value,filters):
    dff = search_df(df,region,tipe,kategori,year_value,filters)
    dff = dff[['likes','dislikes','views','comment_count','ratings_disabled','comments_disabled','video_error_or_removed','len_title','days_to_trending']]
    fig = px.imshow(dff.corr(), color_continuous_scale='Aggrnyl')
    fig.update_xaxes(showgrid=False)
    fig.update_layout(height = 245,paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', font_color='white')
    return fig

#scatter header
@app.callback(
    dash.dependencies.Output('scatter_header', 'children'),
    [dash.dependencies.Input('all_feature_corellation','hoverData')])
def title_scatter(hover):
    col_x = hover['points'][0]['x'].capitalize()
    col_y = hover['points'][0]['y'].capitalize()
    return f'Scatterplot {col_x} vs {col_y} by Publish Month'

#statistic_over_time
@app.callback(
    dash.dependencies.Output('statistic_over_time', 'children'),
    [dash.dependencies.Input('tipe','value')])
def title_over_time(tipe):
    tipe = tipe.capitalize()
    return f'{tipe} Over Time'

#scatter_plot
@app.callback(
    dash.dependencies.Output('scatter_plot', 'figure'),
    [dash.dependencies.Input('all_feature_corellation','hoverData'),
     dash.dependencies.Input('region', 'value'),
     dash.dependencies.Input('tipe', 'value'),
     dash.dependencies.Input('kategori', 'value'),
     dash.dependencies.Input('crossfilter-year--slider', 'value'),
     dash.dependencies.Input('sort', 'value'),
     dash.dependencies.Input('filter', 'value')])
def update_scatter_plot(hover,region, tipe,kategori,year_value,sort,filters):
	dff = search_df(df,region,tipe,kategori,year_value,filters)
	dff = dff.sort_values('publish_month')
	col_x = hover['points'][0]['x']
	col_y = hover['points'][0]['y']
	fig = px.scatter(dff, x=col_x, y=col_y, animation_frame="publish_month",animation_group="title"
				 , color="region", hover_name=sort, color_discrete_sequence=px.colors.sequential.Aggrnyl)
	#fig["layout"].pop("updatemenus") # optional, drop animation buttons
	fig.update_xaxes(showgrid=False)
	fig.update_yaxes(showgrid=False)
	fig.update_layout(height=600, paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', font_color='white'
                     ,xaxis_title=col_x,yaxis_title=col_y)
	return fig

#radar_corr
@app.callback(
    dash.dependencies.Output('radar_corr', 'figure'),
    [dash.dependencies.Input('region', 'value'),
     dash.dependencies.Input('tipe', 'value'),
     dash.dependencies.Input('kategori', 'value'),
     dash.dependencies.Input('crossfilter-year--slider', 'value'),
     dash.dependencies.Input('filter', 'value')])
def update_radar_corr(region, tipe,kategori,year_value,filters):
    dff = search_df(df,region,tipe,kategori,year_value,filters)
    day = ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']
    fig = go.Figure()
    for d in day:
        dff1 = dff[dff['publishing_day']==d]
        dff1 = dff1[['views','likes','dislikes','comment_count','days_to_trending','len_title']]
        dff1 = dff1[['views','likes','dislikes','comment_count','days_to_trending']]
        fig.add_trace(go.Scatterpolar(r=list(dff1.corr()[tipe].values),theta=list(dff1.corr()[tipe].keys()),fill='toself',name=d))
    fig.update_layout(height=245,polar=dict(radialaxis=dict(visible=True,range=[0, 1])),showlegend=True, margin={'l': 50, 'b': 30, 'r': 10, 't': 10}, paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', font_color='white')
    return fig

## Row Lima

In [18]:
#wordcloud
@app.callback(
    dash.dependencies.Output('wordcloud', 'figure'),
    [dash.dependencies.Input('region', 'value'),
     dash.dependencies.Input('tipe', 'value'),
     dash.dependencies.Input('kategori', 'value'),
     dash.dependencies.Input('crossfilter-year--slider', 'value'),
     dash.dependencies.Input('filter', 'value')])
def update_word_cloud(region, tipe,kategori,year_value,filters):
    dff = search_df(df,region,tipe,kategori,year_value,filters)
    title_words = list(dff["title"].apply(lambda x: x.split()))
    title_words = [x for y in title_words for x in y]

    wc = wordcloud.WordCloud(width=1200, height=500, 
                         collocations=False, background_color="white", 
                         colormap="tab20b").generate(" ".join(title_words))
    fig = px.imshow(wc)
    fig.update_xaxes(showgrid=False)
    fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', font_color='white')
    return fig

#len_dist
@app.callback(
    dash.dependencies.Output('len_dist', 'figure'),
    [dash.dependencies.Input('region', 'value'),
     dash.dependencies.Input('tipe', 'value'),
     dash.dependencies.Input('kategori', 'value'),
     dash.dependencies.Input('crossfilter-year--slider', 'value'),
     dash.dependencies.Input('filter', 'value')])
def update_len_dist(region, tipe,kategori,year_value,filters):
    dff = search_df(df,region,tipe,kategori,year_value,filters)
    fig = px.histogram(df['len_title'],['len_title'], color_discrete_sequence=px.colors.sequential.Aggrnyl)
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', font_color='white',xaxis_title="Jumlah Huruf Pada Judul",yaxis_title="Jumlah Video")
    return fig

# Run Programs

In [19]:
if __name__ == '__main__':
    app.run_server(debug=True)

Dash app running on http://127.0.0.1:8050/
