In [216]:
# !pip install Dash
# !pip install dash-bootstrap-components
# !pip install WordCloud

In [217]:
from dash import Dash, dcc, Output, Input, html  # pip install dash
import dash_bootstrap_components as dbc    # pip install dash-bootstrap-components
import plotly.express as px
import pandas as pd                        # pip install pandas
import webbrowser
from threading import Timer
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import base64
from io import BytesIO
from matplotlib.colors import LinearSegmentedColormap

In [218]:
df = pd.read_csv("data/cleaned_global_youtube_statistics.csv")
df_country = pd.read_csv("data/cleaned_country_data.csv")

In [219]:
df

Unnamed: 0,rank,Youtuber,subscribers,video views,category,uploads,Country,channel_type,video_views_rank,country_rank,highest_monthly_earnings,created_year,Gross tertiary education enrollment (%),Unemployment rate
0,1,T-Series,245000000,2.280000e+11,Music,20082,India,Music,1.0,1.0,9000000.00,2006.0,28.1,5.36
1,2,YouTube Movies,170000000,0.000000e+00,Film & Animation,1,United States,Games,4055159.0,7670.0,0.05,2006.0,88.2,14.70
2,3,MrBeast,166000000,2.836884e+10,Entertainment,741,United States,Entertainment,48.0,1.0,5400000.00,2012.0,88.2,14.70
3,4,Cocomelon - Nursery Rhymes,162000000,1.640000e+11,Education,966,United States,Education,2.0,2.0,7900000.00,2006.0,88.2,14.70
4,5,SET India,159000000,1.480000e+11,Shows,116536,India,Entertainment,3.0,2.0,7300000.00,2006.0,28.1,5.36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
825,990,Migos ATL,12400000,6.993406e+09,Music,99,United States,Entertainment,833.0,175.0,197600.00,2012.0,88.2,14.70
826,991,Natan por Aï¿,12300000,9.029610e+09,Sports,1200,Brazil,Entertainment,525.0,55.0,2200000.00,2017.0,51.3,12.08
827,992,Free Fire India Official,12300000,1.674410e+09,People & Blogs,1500,India,Games,6141.0,125.0,258900.00,2018.0,28.1,5.36
828,994,RobTopGames,12300000,3.741235e+08,Gaming,39,Sweden,Games,35112.0,4.0,15500.00,2012.0,67.0,6.48


In [220]:
font_dark="#0b090a"
font_light="#e4e3e4"

color_grey="#232a2f"
color_bloodred="#660708"
color_cornellred="#ba181b"
color_imperialred="#E5383B"
color_silver="#c2bdbd"
color_timbergrey="#d3d3d3"
color_platinum="#e4e3e4"


In [221]:
# Define colors in the colormap
colors = [color_grey, font_dark, color_cornellred, color_imperialred]

# Create a custom colormap
custom_colormap = LinearSegmentedColormap.from_list("custom_colormap", colors)


In [222]:
year_counts = df['created_year'].value_counts().to_dict()

In [223]:
app = Dash(__name__, external_stylesheets=[dbc.themes.SANDSTONE])

In [224]:
# Generate word cloud
def generate_wordcloud(text):
    wordcloud = WordCloud(width=4000, height=3200, background_color=color_silver, margin=0, colormap=custom_colormap, min_font_size=20).generate(text)
    return wordcloud

def plot_wordcloud(wordcloud):
    plt.figure(figsize=(10, 8), facecolor=None)  # Remove figure background
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.subplots_adjust(left=0, right=1, top=1, bottom=0)  # Remove padding
    buf = BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0, transparent=True)  # Save without padding and with transparency
    plt.close()
    buf.seek(0)
    image = base64.b64encode(buf.read()).decode('utf-8')
    return f"data:image/png;base64,{image}"

text = ' '.join(df['category'].dropna().astype(str))
wordcloud = generate_wordcloud(text)
wordcloud_image = plot_wordcloud(wordcloud)


In [225]:
top_list = html.Ul(
    [html.Li(f"{index + 1}. {row['Youtuber']}") for index, row in df.head(25).iterrows()],
    style={
        'maxHeight': '100%',
        'padding': '10px',
        'listStyleType': 'none',
        'color': font_light,
        'lineHeight': '2'  # Adjust this value to increase/decrease space between lines
    }
)

In [226]:
cols = [col for col in df_country.columns if col != 'Country']

In [227]:
histogram = px.histogram(df, x='Country')
scatter_fig = px.scatter(df, x='Country', y='created_year')
scatter_fig2 = px.scatter(df, x='Country', y='created_year')
financial_scatter = px.scatter(df, x='Country', y='created_year')

In [228]:
string="Here, we analyze different categories of YouTube channels using various metrics. It is important to note that you can select the channel types for the graphs to visualize. The word cloud illustrates the types of videos uploaded on these channels."

In [229]:
app.layout = dbc.Container([
    dbc.Row([
        dbc.Col([
            html.H2("Best of The Best", style={'color': font_light, 'text-align': 'left'}),
            top_list
        ], width=2, style={
            'background-color': color_grey, 'margin': '0px', 'padding': '20px', 'height': '100vh', 'color': font_light,
            'box-shadow': '2px 2px 5px rgba(0, 0, 0, 0.1)', 'overflow': 'hidden', 'position': 'fixed', 'top': '0', 'left': '0', 'z-index': '1000'
        }),
        
        dbc.Col([   
            dbc.Row([
                dbc.Col(html.H1("TOP YOUTUBERS OF 2023", style={'color': font_light, 'text-align': 'left', 'fontSize': '54px'}), width=10)
            ], style={
                'background-color': color_bloodred, 'padding': '10px', 'margin': '0px',
                'box-shadow': '2px 2px 5px rgba(0, 0, 0, 0.1)'
            }),
    
            dbc.Row([
                dbc.Col(html.H2("Some Data Comparison Between Channel Types", style={'color': font_dark, 'text-align': 'left'}), width=10)
            ], style={
                'background-color': color_timbergrey, 'margin': '10px', 'padding': '20px',
                'box-shadow': '2px 2px 5px rgba(0, 0, 0, 0.1)'
            }),

            dbc.Row([
                dbc.Col(html.P(string, style={'color': font_dark, 'text-align': 'left'}), width=10)
            ], style={
                'background-color': color_timbergrey, 'margin': '10px', 'padding': '20px',
                'box-shadow': '2px 2px 5px rgba(0, 0, 0, 0.1)'
            }),
    
            dbc.Row([
                dbc.Col(html.H4("Choose Channel Type!", style={'color': font_dark, 'text-align': 'left'}), width=9, style={'margin_right':'10px'}),
                dbc.Col(
                    dcc.Dropdown(
                        id='heatmap-dropdown',
                        options=[{'label': col, 'value': col} for col in cols],  # Updated to df.columns for demonstration
                        value='Total',
                        style={"background-color": font_light, "color": font_dark}
                    ),
                    width=3
                )
            ], style={
                'background-color': color_timbergrey, 'margin': '10px', 'padding': '10px',
                'box-shadow': '2px 2px 5px rgba(0, 0, 0, 0.1)'
            }),
        
            dbc.Row([
                dbc.Col(html.Img(id='wordcloud-image', src=wordcloud_image, 
                                 style={'width': '100%', 'display': 'block', 'margin': '0 auto', 'height': 'auto'})
                        , width=5),
                dbc.Col(
                    dcc.Graph(id='heatmap', style={"background-color": color_timbergrey, 'width': '100%', 'height': '100%'})  # Placeholder for heatmap
                , width=7, style={"background-color": color_timbergrey,'height': '100%', 'display': 'flex', 'alignItems': 'center', 'justifyContent': 'center', 'box-shadow': '2px 2px 5px rgba(0, 0, 0, 0.1)'})
            ], style={
                'margin': '20px', 'display': 'flex', 'alignItems': 'stretch'
            }),

            dbc.Row(dbc.Col(html.H2("Channel Creation", style={'color': font_dark, 'text-align': 'left'}), width=10), 
                    style={
                        'background-color': color_timbergrey, 'margin': '10px',
                        'box-shadow': '2px 2px 5px rgba(0, 0, 0, 0.1)'
                    }),
        
            dbc.Row([
                dbc.Col([
                    dcc.Graph(id= 'histogram', figure=histogram),  # Histogram on top right
                ], width=12)
            ]),  # <- Added closing bracket here

            dbc.Row([
                dbc.Col(html.H2("Let's Talk About Subscribers!", style={'color': font_dark, 'text-align': 'left'}), width=10)
            ], style={
                'background-color': color_timbergrey, 'margin': '10px',
                'box-shadow': '2px 2px 5px rgba(0, 0, 0, 0.1)'
            }),
        
            dbc.Row([
                dbc.Col([
                    dcc.Graph(id='scatter_fig', figure=scatter_fig),  # Histogram on top right
                ], width=6),
                dbc.Col([
                    dcc.Graph(id='scatter_fig2', figure=scatter_fig2),  # Histogram on top right
                ], width=6)
            ], style={'margin': '0px'}),

            dbc.Row(dbc.Col(html.H2("Some Financial Insight", style={'color': font_dark, 'text-align': 'left'}), width=10), 
                    style={
                        'background-color': color_timbergrey, 'margin': '10px',
                        'box-shadow': '2px 2px 5px rgba(0, 0, 0, 0.1)'
                    }),
        
            dbc.Row([
                dbc.Col([
                    dcc.Graph(id= 'financial_scatter', figure=financial_scatter),
                ], width=12)
            ])
        ], width=10, style={
            'background-color': color_silver, 'margin-left': '16.666667%', 'box-shadow': '2px 2px 5px rgba(0, 0, 0, 0.1)', 'overflow-y': 'scroll', 'padding-left': '10px', 'padding-right': '10px'
        })
    ])
], fluid=True, style={'background-color': color_silver, 'margin': '0px'})



In [230]:
custom_color_scale = [
    (0.0, color_bloodred),
    (1.0, font_light)
]

In [231]:
@app.callback(
    Output('heatmap', 'figure'),
    Input('heatmap-dropdown', 'value')
)
def update_heatmap(column_name):
    heatmap_fig = px.choropleth(
        data_frame=df_country,
        locations='Country',
        locationmode="country names",
        color=column_name,
        color_continuous_scale=custom_color_scale
    )
    heatmap_fig.update_layout(
        paper_bgcolor=color_timbergrey,
        plot_bgcolor=color_timbergrey,
        margin=dict(l=20, r=20, t=40, b=20)
    )

    heatmap_fig.update_geos(
        bgcolor=color_timbergrey,  # Background color of the map
        landcolor=color_silver,  # Color for the continents/land
        lakecolor=color_timbergrey,
        showland=True
    )
    
    return heatmap_fig

@app.callback(
    Output('scatter_fig2', 'figure'),
    Input('heatmap-dropdown', 'value')
)
def update_scatter_fig2(value):
    df2=df

    if (value!='Total'): df2=df[df['channel_type']==value]
    
    scatter_fig2 = px.scatter(
        df2,
        x='uploads',
        y='subscribers',
        labels={'uploads': 'Total Video Uploads', 'subscribers': 'Subscribers'},
        title='Subscribers vs Total Video Uploads',
        template='plotly_white',
        hover_name='Youtuber'
    )
    scatter_fig2.update_layout(
        paper_bgcolor=color_timbergrey,
        plot_bgcolor=color_timbergrey,
        margin=dict(l=20, r=20, t=40, b=20)
    )
    
    scatter_fig2.update_traces(marker=dict(color=color_cornellred))
    
    return scatter_fig2

@app.callback(
    Output('scatter_fig', 'figure'),
    Input('heatmap-dropdown', 'value')
)
def update_scatter_fig(value):
    df2=df

    if (value!='Total'): df2=df[df['channel_type']==value]
    
    scatter_fig = px.scatter(
        df2,
        x='video views',
        y='subscribers',
        labels={'video views': 'Total Video Views', 'subscribers': 'Subscribers'},
        title='Subscribers vs Total Video Views',
        template='plotly_white',
        hover_name='Youtuber'
    )
    scatter_fig.update_layout(
        paper_bgcolor=color_timbergrey,
        plot_bgcolor=color_timbergrey,
        margin=dict(l=20, r=20, t=40, b=20)
    )
    
    scatter_fig.update_traces(marker=dict(color=color_cornellred))
    
    return scatter_fig

@app.callback(
    Output('financial_scatter', 'figure'),
    Input('heatmap-dropdown', 'value')
)
def update_financial_scatter(value):
    df2=df

    if (value!='Total'): df2=df[df['channel_type']==value]
    
    financial_scatter = px.scatter(
        df2,
        x='Unemployment rate',
        y='highest_monthly_earnings',
        labels={'highest_monthly_earnings': 'Highest Monthly Earnings', 'Unemployment rate': 'Unemployment rate'},
        title='Highest Monthly Earnings vs Country Unemployment Rate',
        template='plotly_white',
        hover_name='Youtuber'
    )
    financial_scatter.update_layout(
        paper_bgcolor=color_timbergrey,
        plot_bgcolor=color_timbergrey,
        margin=dict(l=20, r=20, t=40, b=20)
    )
    
    financial_scatter.update_traces(marker=dict(color=color_cornellred))
    
    return financial_scatter

@app.callback(
    Output('histogram', 'figure'),
    Input('heatmap-dropdown', 'value')
)
def update_histogram(value):
    df2=df

    if (value!='Total'): df2=df[df['channel_type']==value]
    
    histogram = px.histogram(
        df2,
        y='created_year',
        labels={'created_year': 'Year Created'},
        title='Histogram of Created Year',
        template='plotly_white'
    )

    histogram.update_layout(
        paper_bgcolor=color_timbergrey,
        plot_bgcolor=color_timbergrey,
        margin=dict(l=20, r=20, t=40, b=20)
    )

    histogram.update_traces(marker=dict(color=color_cornellred))
    histogram.update_yaxes(range=[2005, df['created_year'].max()])
    
    return histogram
    
@app.callback(
    Output('wordcloud-image', 'src'),
    Input('heatmap-dropdown', 'value')
)
def update_wordcloud(value):
    if (value=='Total'):
        text = ' '.join(df['category'].dropna().astype(str))  # Modify as needed based on `value`
        wordcloud = generate_wordcloud(text)
        
    else:
            df2=df[df['channel_type']==value]
            text = ' '.join(df2['category'].dropna().astype(str))  # Modify as needed based on `value`
            wordcloud = generate_wordcloud(text)
    
    return plot_wordcloud(wordcloud)

def open_browser():
    webbrowser.open_new("http://127.0.0.1:8054/")

if __name__ == '__main__':
    #Timer(1, open_browser).start()
    app.run_server(debug=True, port=8054)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[231], line 154, in update_wordcloud(value='Total')
    151         text = ' '.join(df2['category'].dropna().astype(str))  # Modify as needed based on `value`
    152         wordcloud = generate_wordcloud(text)
--> 154 return plot_wordcloud(wordcloud)
        wordcloud = <wordcloud.wordcloud.WordCloud object at 0x0000027B2EDDD5D0>

Cell In[224], line 8, in plot_wordcloud(
    wordcloud=<wordcloud.wordcloud.WordCloud object>
)
      6 def plot_wordcloud(wordcloud):
      7     plt.figure(figsize=(10, 8), facecolor=None)  # Remove figure background
----> 8     plt.imshow(wordcloud, interpolation='bilinear')
        wordcloud = <wordcloud.wordcloud.WordCloud object at 0x0000027B2EDDD5D0>
        plt = <module 'matplotlib.pyplot' from 'C:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\pypl