In [24]:
from dash import Dash, dcc, Output, Input, html  # pip install dash
import dash_bootstrap_components as dbc    # pip install dash-bootstrap-components
import plotly.express as px
import pandas as pd                        # pip install pandas
import webbrowser
from threading import Timer
from wordcloud import WordCloud #pip install WordCloud
import matplotlib.pyplot as plt
import base64
from io import BytesIO

In [25]:
# Load the dataset
file_path = 'data/cleaned_global_youtube_statistics.csv'
df = pd.read_csv(file_path)



In [26]:
def generate_wordcloud(data):
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(data)
    return wordcloud
def wordcloud_to_base64(wordcloud):
    buffer = BytesIO()
    wordcloud.to_image().save(buffer, format="PNG")
    img_str = base64.b64encode(buffer.getvalue()).decode()
    return f"data:image/png;base64,{img_str}"



In [27]:
# Build your components
app = Dash(__name__, external_stylesheets=[dbc.themes.LUX])
heatmap = dcc.Graph(figure={})
histogram = dcc.Graph(figure={})
scatterplot = dcc.Graph(figure={})
wordcloud = html.Img(id='wordcloud-img')  # Updated to use 'id' for the Img component
barchart = dcc.Graph(figure={})
dropdown = dcc.Dropdown(options=[{'label': col, 'value': col} for col in df.columns],
                        value='subscribers',  # initial value displayed when page first loads
                        clearable=False)

In [28]:
app.layout = dbc.Container([
    dbc.Row([
        dbc.Col(html.H1("YouTube Statistics Data Visualization", className="text-center"), width=12)  # Title
    ]),
    dbc.Row([
        dbc.Col(
            dbc.Row([
                dbc.Col(dropdown, width=12, style={'margin-bottom': '20px'}),
                dbc.Col(heatmap, width=12)
            ], className='h-100'), width=6, className='d-flex flex-column h-100'),
        dbc.Col(
            dbc.Row([
                dbc.Col(histogram, width=12),
                dbc.Col(scatterplot, width=12)
            ], className='h-100'), width=6, className='d-flex flex-column h-100')
    ], style={'height': '600px'}),
    dbc.Row([
        dbc.Col(wordcloud, width=6),  # Wordcloud on the left
        dbc.Col(barchart, width=6)  # Bar chart on the right
    ])
], fluid=True)

In [29]:
@app.callback(
    Output(heatmap, 'figure'),
    Output(histogram, 'figure'),
    Output(scatterplot, 'figure'),
    Output('wordcloud-img', 'src'),  # Update to use 'src' of html.Img
    Output(barchart, 'figure'),
    Input(dropdown, 'value')
)
def update_graph(column_name):  # Function arguments come from the component property of the Input

    print(column_name)
    print(type(column_name))

    # Ensure the selected column is numeric
    if df[column_name].dtype == 'object':
        df[column_name] = pd.to_numeric(df[column_name], errors='coerce')

    # Color scheme
    color_discrete_sequence = px.colors.qualitative.Set3

    # Heatmap
    heatmap_fig = px.choropleth(data_frame=df,
                                locations='Country',
                                locationmode="country names",
                                scope="world",
                                height=600,
                                color=column_name,
                                color_continuous_scale='Viridis',
                                labels={column_name: column_name})  # Ensure the color legend updates correctly

    # Histogram
    histogram_fig = px.histogram(df, x=column_name, title=f'Histogram of {column_name}',
                                 color_discrete_sequence=color_discrete_sequence)

    # Scatterplot
    scatterplot_fig = px.scatter(df, x='created_year', y=column_name, title=f'Scatterplot of {column_name} over Years',
                                 color_discrete_sequence=color_discrete_sequence)

    # Generate word cloud
    wordcloud_data = ' '.join(df['category'].dropna().astype(str))
    wc = generate_wordcloud(wordcloud_data)
    wc_img = wordcloud_to_base64(wc)

    # Bar chart
    barchart_fig = px.bar(df, x='Country', y=column_name, title=f'Bar Chart of {column_name} by Country',
                          color_discrete_sequence=color_discrete_sequence)

    return heatmap_fig, histogram_fig, scatterplot_fig, wc_img, barchart_fig


In [30]:
def open_browser():
    webbrowser.open_new("http://127.0.0.1:8054/")


In [31]:
if __name__ == '__main__':
    Timer(1, open_browser).start()
    app.run_server(debug=True, port=8054)


subscribers
<class 'str'>
subscribers
<class 'str'>
