# Visualizations with dash

### Dynamic multi-input etc
* https://dash.plotly.com/advanced-callbacks
* https://community.plotly.com/t/updating-a-dropdown-menus-contents-dynamically/4920/4
* https://dash-example-index.herokuapp.com/?code=dcc.Dropdown
* https://dash.plotly.com/dash-core-components/dropdown
* https://dash-bootstrap-components.opensource.faculty.ai/examples/iris/

## Imports

In [None]:
from dash import Dash, html, dcc, callback, Output, Input
import dash_bootstrap_components as dbc
import plotly.express as px
import pandas as pd

from wordcloud import WordCloud
from plotly.subplots import make_subplots
import plotly.graph_objects as graph_objects

## Dataframes etc

### counts_df contains topic counts, topic percentages, POS counts,  emotions, sentiment, and data like producer count, artist metadata etc

In [None]:
counts_df = pd.read_csv('dataframes/with_counts/combined_count.csv', index_col=0)

In [None]:
counts_df.head()

In [None]:
list(counts_df.columns)

#### extra counts_df setup

In [None]:
artists = list(counts_df.Artist.unique())

In [None]:
topics = [
 'manual_love_count',
 'manual_money_count',
 'manual_violence_count',
 'manual_drugs_count',
 'manual_gendered_count',
 'manual_sadness_count',
 'manual_joy_count',
 'manual_yes_count',
 'manual_no_count'
]

### top_20_filtered_words_genre/artist_df - contain top 20 most popular words for each artist / genre with extra filtered words removed:
the extra filtered words are: ['dont', 'im', 'know', 'yeah']

In [None]:
top_20_filtered_words_genre_df = pd.read_csv('dataframes/top_20_filtered_words_by_genre.csv')
top_20_filtered_words_artist_df = pd.read_csv('dataframes/top_20_filtered_words_by_artist.csv')

### ngram dfs - top 20 ngrams and their frequencies for 3 ngram lenghts (2,3,4)

In [None]:
artist_ngrams_df = pd.read_csv('dataframes/artist_ngrams.csv', index_col=0)
genre_ngrams_df = pd.read_csv('dataframes/genre_ngrams.csv', index_col=0)

### genre groupby stats dfs

In [None]:
genre_mean_df = pd.read_csv('dataframes/group_stats/genre_mean_df.csv', index_col = 0)
genre_sum_df = pd.read_csv('dataframes/group_stats/genre_sum_df.csv', index_col = 0)

### artist groupby stats dfs

In [None]:
artist_mean_df = pd.read_csv('dataframes/group_stats/mean/combined_artists.csv', index_col = 0)
artist_sum_df = pd.read_csv('dataframes/group_stats/sum/combined_artists.csv', index_col = 0)

## App

In [None]:
app = Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])

## App graphs

### Topic counts from selected artists by year GRAPH
#### [Topic counts from selected artists by year] controls

In [None]:
topic_controls = dbc.Card(
    [
        html.Div(
            [
            dbc.Label("Genre"),
            dcc.Dropdown(
                id='topic-genre-selection',
                value = 'soul',
                options = list(counts_df.genre.unique()),
                multi = True
            )
            ]),
        html.Div(
            [
            dbc.Label("Artist"),
            # dcc.Dropdown(df.Artist.unique(), 'Al Green', id='topic-artist-selection')
            dcc.Dropdown(
                id='topic-artist-selection-dynamic',
                value='Al Green',
                multi = True
            )
            ]),
        html.Div(
            [
            dbc.Label("Manual word topic"),
            dcc.Dropdown(topics, 'manual_love_count', id='topic-selection')
            ]),
        # html.Div(
        #     [
        #     dbc.Label("Year"),
        #     dcc.Dropdown(id='year-selection-dynamic')
        #     ])
    ],
    body=True,
)

#### [Topic counts from selected artists by year] layout

In [None]:
topic_container = dbc.Container([
    html.H1(children = 'Genre and Artist manual topic counts by year', style={'textAlign': 'center'}),
    # html.P(children="multi artist select result:"),
    html.P(id="multi-artist-dynamic-test"),
    # html.P(children="resultant df shape:"),
    html.P(id="topic-df-shape"),
    dbc.Row(
        [
            dbc.Col(topic_controls, md=4),
            dbc.Col(dcc.Graph(id='topic-graph-content'), md=8),
        ],
        align="center",
    ),
    # dbc.DropdownMenu(label="Artist", id="artist-selection", children=items),
    # dbc.Select(id="artist-selection", options=items),

], fluid=True)

#### [Topic counts from selected artists by year] controls callbacks

In [None]:
# ========= topic graph by year with genre, artist, topic selection ===========
@app.callback(
    [
        Output('topic-graph-content', 'figure'),
        Output('topic-df-shape', 'children'),
        Output('multi-artist-dynamic-test', 'children')
    ],
    [
        Input('topic-genre-selection', 'value'),
        Input('topic-artist-selection-dynamic', 'value'),
        Input('topic-selection', 'value'),
    ]
)
def update_topic_graph(genres, artists, topic):
    genre_df = counts_df[counts_df['genre'].isin(genres)]
    # in case there is only one artist selected
    artists_list = [artist for artist in artists]
    artists_df = genre_df[genre_df['Artist'].isin(artists_list)]
    # dff = df[df.Artist == artist]
    fig = px.bar(artists_df, x='Year', y=topic, color='genre',
                hover_data = ['Artist'])
    df_shape = artists_df.shape
    
    return fig, f'df shape: {df_shape}', f'artists list: {artists}'
    # return fig

# ========= set available artist options based on chosen genre =========
@app.callback(
    Output('topic-artist-selection-dynamic', 'options'),
    Input('topic-genre-selection', 'value')
)
def set_dynamic_artist_options(genres):
    options = dict()
    for genre in genres:
        genre_artists = list(counts_df[counts_df['genre'] == genre]['Artist'].unique())
        for artist in genre_artists:
            options[artist] = artist
    return options

# ======= choose from available dynamic options =========
# has to be done manually due to dynamicity
# also this is how to return multiple options
# in case of one you'd have to take [0] of value from options
@app.callback(
    Output('topic-artist-selection-dynamic', 'value'),
    Input('topic-artist-selection-dynamic', 'options')
)
def set_dynamic_artist_value(options):
    # output = [option for option in options]
    # return output
    return options

### Topic counts /percentages comparison by genre bars

#### controls

In [None]:
bar_topic_controls = dbc.Card(
    [
        html.Div(
            [
            dbc.Label("Manual word topic"),
            dcc.Dropdown(options = topics,
                        value = ['manual_love_count',],
                        id='bar-topic-selection',
                        multi=True)
            ]),
    ],
    body=True,
)

#### layout

In [None]:
bar_topic_container = dbc.Container([
    html.H1(children = 'Genre topic bars comparison', style={'textAlign': 'center'}),
    html.P(id="bar-topic-df-shape"),
    dbc.Row(
        [
            dbc.Col(bar_topic_controls, md=4),
            dbc.Col(dcc.Graph(id='bar-topic-graph-content'), md=8),
        ],
        align="center",
    ),
], fluid=True)

#### callbacks

In [None]:
# ========= topic graph by year with genre, artist, topic selection ===========
@app.callback(
    [
        Output('bar-topic-graph-content', 'figure'),
        Output('bar-topic-df-shape', 'children')
    ],
    [
        Input('bar-topic-selection', 'value'),
    ]
)
def update_bar_topic_graph(topics):
    bar_fig = graph_objects.Figure()
    # in case there is only one selected topic - turn input into a list:
    topics_list = [topic for topic in topics]
    for topic in topics_list:
        bar_fig.add_trace(graph_objects.Bar(y=genre_sum_df[topic].values, x=genre_sum_df.index, name=topic))
    
    return bar_fig, f'topics: {topics} topics_list: {topics_list}'
    # return fig


In [None]:
# side_fig = graph_objects.Figure()
# side_fig.add_trace(graph_objects.Bar(y=loaded_sum_df['love'].values, x=loaded_sum_df.index, name='love'))
# side_fig.add_trace(graph_objects.Bar(y=loaded_sum_df['money'].values, x=loaded_sum_df.index, name='money'))
# side_fig.add_trace(graph_objects.Bar(y=loaded_sum_df['violence'].values, x=loaded_sum_df.index, name='violence'))
# side_fig.add_trace(graph_objects.Bar(y=loaded_sum_df['drugs'].values, x=loaded_sum_df.index, name='drugs'))
# side_fig.show()

### Topic counts /percentages comparison by artist bars (within genre)

#### controls

In [None]:
genres = artist_sum_df['genre'].unique()

In [None]:
artist_bar_topic_controls = dbc.Card(
    [
        html.Div(
            [
            dbc.Label("Manual word topic"),
            dcc.Dropdown(options = topics,
                        value = ['manual_love_count',],
                        id='artist-bar-topic-selection',
                        multi=True)
            ]),
        html.Div(
            [
            dbc.Label("Genre"),
            dcc.Dropdown(options = list(genres),
                        value = ['pop',],
                        id='artist-bar-genre-selection')
            ]),
    ],
    body=True,
)

In [None]:
artist_bar_topic_container = dbc.Container([
    html.H1(children = 'Within genre artists topic bars comparison', style={'textAlign': 'center'}),
    html.P(id="artist-bar-topic-df-shape"),
    dbc.Row(
        [
            dbc.Col(artist_bar_topic_controls, md=4),
            dbc.Col(dcc.Graph(id='artist-bar-topic-graph-content'), md=8),
        ],
        align="center",
    ),
], fluid=True)

In [None]:
# ========= topic graph by year with genre, artist, topic selection ===========
@app.callback(
    [
        Output('artist-bar-topic-graph-content', 'figure'),
        Output('artist-bar-topic-df-shape', 'children')
    ],
    [
        Input('artist-bar-topic-selection', 'value'),
        Input('artist-bar-genre-selection', 'value'),
    ]
)
def update_artist_bar_topic_graph(topics, genre):
    bar_fig = graph_objects.Figure()
    genre_df = artist_sum_df[artist_sum_df['genre'] == genre]
    genre_artists = list(genre_df['Artist'].unique())
    # in case there is only one selected topic - turn input into a list:
    topics_list = [topic for topic in topics]
    for topic in topics_list:
        bar_fig.add_trace(graph_objects.Bar(y=genre_df[topic].values, x=genre_artists, name=topic))
    
    return bar_fig, f'topics: {topics} topics_list: {topics_list}'
    # return fig

### scatter for artist / genre where x = one topic, y = second topic
- colored by genre - colored by gender

In [None]:
topic_scatter_controls = dbc.Card(
    [
        html.Div(
            [
            dbc.Label("Manual word topic (Y)"),
            dcc.Dropdown(options = topics,
                        value = 'manual_joy_count',
                        id='scatter-topic-Y-selection')
            ]),
        html.Div(
            [
            dbc.Label("Manual word topic (X)"),
            dcc.Dropdown(options = topics,
                        value = 'manual_sadness_count',
                        id='scatter-topic-X-selection')
            ]),
        # html.Div(
        #     [
        #     dbc.Label("Artist"),
        #     dcc.Dropdown(options = artists,
        #                 value = ['Al Green',],
        #                 id='topic-scatter-artist-selection',
        #                 multi=True)
        #     ]),
        html.Div(
            [
            dbc.Label("color by"),
            dcc.Dropdown(options = ['genre', 'gender'],
                        value = 'genre',
                        id='topic-scatter-color-selection')
            ]),
    ],
    body=True,
)

In [None]:
topic_scatter_container = dbc.Container([
    html.H1(children = 'scatter for all artists where x = one topic, y = second topic', style={'textAlign': 'center'}),
    html.P(id="topic-scatter-df-shape"),
    dbc.Row(
        [
            dbc.Col(topic_scatter_controls, md=4),
            dbc.Col(dcc.Graph(id='topic-scatter-graph-content'), md=8),
        ],
        align="center",
    ),
], fluid=True)

In [None]:
# ========= topic graph by year with genre, artist, topic selection ===========
@app.callback(
    [
        Output('topic-scatter-graph-content', 'figure'),
        Output('topic-scatter-df-shape', 'children')
    ],
    [
        Input('scatter-topic-Y-selection', 'value'),
        Input('scatter-topic-X-selection', 'value'),
        Input('topic-scatter-color-selection', 'value'),
    ]
)
def update_artist_bar_topic_graph(topic_X, topic_Y, colorby):
    # artist_sum_df
    # df_shape = artist_sum_df.shape
    # scatter_fig = graph_objects.Figure()
    scatter_fig = px.scatter(artist_sum_df, x=topic_X, y=topic_Y, color=colorby)
    return scatter_fig, f'topic_X: {topic_X}, topic_Y: {topic_Y}, colorby: {colorby}'

### static genre wordclouds graph creation
#### layout

create the non-dynamic wordclouds

In [None]:
# setup
top_20_filtered_words_genre_df.rename(columns={'Unnamed: 0' : 'genre'}, inplace=True)
genres = top_20_filtered_words_genre_df['genre'].unique()

# column access names
count_cols = []
word_cols = []
n = 20
for ind in range(n):
    word_cols.append(f'word{ind}')
    count_cols.append(f'word{ind}_count')

In [None]:
genre_wordclouds = []
genre_bars = []

for genre in genres:
    genre_df = top_20_filtered_words_genre_df[top_20_filtered_words_genre_df['genre'] == genre]
    genre_words = [genre_df[word].values[0] for word in word_cols]
    genre_counts = [genre_df[count].values[0] for count in count_cols]

    genre_item = {
        'genre': genre,
        'words': genre_words,
        'counts': genre_counts
    }

    genre_bars.append(genre_item)
    
    d = {}
    for word, count in zip(genre_words, genre_counts):
        d[word] = count

    wordcloud = WordCloud(background_color = "white", width=800, height=400)
    wordcloud.generate_from_frequencies(frequencies=d)
    genre_wordclouds.append(wordcloud)

In [None]:
genre_titles = ['pop', 'pop', 'rock', 'rock', 'rap', 'rap', 'soul', 'soul']

In [None]:
genre_wordclouds_fig = make_subplots(rows=4, cols=2, subplot_titles = genre_titles)

for i, genre in enumerate(genres):
    genre_wordclouds_fig.add_trace(graph_objects.Image(z=genre_wordclouds[i]), row = i+1, col = 1)
    genre_wordclouds_fig.add_trace(graph_objects.Bar(x=genre_bars[i]['words'], y=genre_bars[i]['counts'], showlegend = False), row=i+1, col=2,)
genre_wordclouds_fig.update_layout(height = 4 * 400)

#### [static genre wordclouds] layout container

In [None]:
genre_wordcloud_container = dbc.Container([
    html.H1(children = 'Top 20 (filtered) words by genre wordclouds', style={'textAlign': 'center'}),
    dcc.Graph(id='genre-wordcloud-graph-content',
             figure = genre_wordclouds_fig)
], fluid=True)

### Dynamic Artist wordcloud graphs

#### [Dynamic Artist wordcloud graphs] controls

In [None]:
artist_wordcloud_controls = dbc.Card(
    [
        html.Div(
            [
            dbc.Label("Genre"),
            dcc.Dropdown(
                id='wordcloud-genre-selection',
                value = 'soul',
                options = list(top_20_filtered_words_artist_df.genre.unique())
            )
            ]),
        html.Div(
            [
            dbc.Label("Artist"),
            # dcc.Dropdown(df.Artist.unique(), 'Al Green', id='topic-artist-selection')
            dcc.Dropdown(
                id='wordcloud-artist-selection-dynamic',
                value='Al Green'
            )
            ]),
    ],
    body=True,
)

#### [Dynamic Artist wordcloud graphs] layout

In [None]:
artist_wordcloud_container = dbc.Container([
    html.H1(children = 'Genre and Artist top words wordcloud dynamic', style={'textAlign': 'center'}),
    html.P(id='wordcloud-df-shape'),
    dbc.Row(
        [
            dbc.Col(artist_wordcloud_controls, md=4),
            dbc.Col(dcc.Graph(id='artist-wordcloud-graph-content'), md=8),
        ],
        align="center",
    ),
    # dbc.DropdownMenu(label="Artist", id="artist-selection", children=items),
    # dbc.Select(id="artist-selection", options=items),

], fluid=True)

#### [Dynamic Artist wordcloud graphs] controls callbacks

In [None]:
# ========= artist wordcloud ===========
@app.callback(
    [
        Output('artist-wordcloud-graph-content', 'figure'),
        Output('wordcloud-df-shape', 'children')
    ],
    [
        Input('wordcloud-genre-selection', 'value'),
        Input('wordcloud-artist-selection-dynamic', 'value'),
    ]
)
def update_artist_wordcloud_graph(genre, artist):
    artist_df = top_20_filtered_words_artist_df[top_20_filtered_words_artist_df['Artist'] == artist]
    # create wordcloud and bar graph
    artist_words = [artist_df[word].values[0] for word in word_cols]
    artist_counts = [artist_df[count].values[0] for count in count_cols]

    d = {}
    for word, count in zip(artist_words, artist_counts):
        d[word] = count

    wordcloud = WordCloud(background_color = "white", width=800, height=400)
    wordcloud.generate_from_frequencies(frequencies=d)

    artist_fig = make_subplots(rows=1, cols=2, subplot_titles = [f'{artist} wordcloud', f'{artist} bar'])

    artist_fig.add_trace(graph_objects.Image(z=wordcloud), row = 1, col = 1)
    artist_fig.add_trace(graph_objects.Bar(x=artist_words, y=artist_counts, showlegend = False), row=1, col=2,)
    artist_fig.update_layout(height = 1 * 400)
    df_shape = artist_df.shape
    return artist_fig, f'df shape: {df_shape}'

# ========= set available artist options based on chosen genre =========
@app.callback(
    Output('wordcloud-artist-selection-dynamic', 'options'),
    Input('wordcloud-genre-selection', 'value')
)
def set_dynamic_artist_options(genre):
    # options = dict()
    # for genre in genres:
    genre_artists = list(top_20_filtered_words_artist_df[top_20_filtered_words_artist_df['genre'] == genre]['Artist'].unique())
    # for artist in genre_artists:
    #     options[artist] = artist
    # return options
    options = [{"label": artist, "value": artist} for artist in genre_artists]
    return options

# ======= choose from available dynamic options =========
# has to be done manually due to dynamicity
# also this is how to return multiple options
# in case of one you'd have to take [0] of value from options
@app.callback(
    Output('wordcloud-artist-selection-dynamic', 'value'),
    Input('wordcloud-artist-selection-dynamic', 'options')
)
def set_dynamic_artist_value(options):
    # output = [option for option in options]
    # return output
    # return options
    return options[0]['value']

### Dynamic artist ngram graphs

#### [Dynamic artist ngram graphs] controls

In [None]:
ngram_artist_wordcloud_controls = dbc.Card(
    [
        html.Div(
            [
            dbc.Label("Genre"),
            dcc.Dropdown(
                id='ngram-wordcloud-genre-selection',
                value = 'soul',
                options = list(artist_ngrams_df.genre.unique())
            )
            ]),
        html.Div(
            [
            dbc.Label("Artist"),
            dcc.Dropdown(
                id='ngram-wordcloud-artist-selection-dynamic',
                value='Al Green'
            )
            ]),
    ],
    body=True,
)

#### [Dynamic artist ngram graphs] layout

In [None]:
ngram_artist_wordcloud_container = dbc.Container([
    html.H1(children = 'Genre and Artist top ngram wordclouds dynamic', style={'textAlign': 'center'}),
    html.P(id='ngram-wordcloud-df-shape'),
    dbc.Row(
        [
            dbc.Col(ngram_artist_wordcloud_controls, md=4),
            dbc.Col(dcc.Graph(id='ngram-artist-wordcloud-graph-content'), md=8),
        ],
        align="center",
    ),
], fluid=True)

#### [Dynamic artist ngram graphs] controls callbacks

In [None]:
# ========= artist wordcloud ===========
@app.callback(
    [
        Output('ngram-artist-wordcloud-graph-content', 'figure'),
        Output('ngram-wordcloud-df-shape', 'children')
    ],
    [
        Input('ngram-wordcloud-genre-selection', 'value'),
        Input('ngram-wordcloud-artist-selection-dynamic', 'value'),
    ]
)
def update_ngram_artist_wordcloud_graph(genre, artist):
    artist_df = artist_ngrams_df[artist_ngrams_df['Artist'] == artist]
    # create wordclouds for all ngram lens

    ngram_lens = [2,3,4]
    ngrams_n = 20
    n = len(ngram_lens) * ngrams_n

    gram_wordclouds_fig = make_subplots(rows=3, cols=2, subplot_titles = ngram_lens)

    for len_index, gram_len in enumerate(ngram_lens):
        gram_count_cols = []
        gram_cols = []
        for ind in range(ngrams_n):
            gram_cols.append(f'ngram_{gram_len}_{ind}')
            gram_count_cols.append(f'count_{gram_len}_{ind}')
            
        gram_words = [artist_df[word].values[0] for word in gram_cols]
        gram_counts = [artist_df[count].values[0] for count in gram_count_cols]
            
        d = {}
        for word, count in zip(gram_words, gram_counts):
            d[word] = count
            
        wordcloud = WordCloud(background_color = "white", width=800, height=400)
        wordcloud.generate_from_frequencies(frequencies=d)
        
        gram_wordclouds_fig.add_trace(graph_objects.Image(z=wordcloud), row=len_index+1, col=1)
        gram_wordclouds_fig.add_trace(graph_objects.Bar(x=gram_words, y=gram_counts, showlegend = False), row=len_index+1, col=2)
        
    gram_wordclouds_fig.update_layout(height = 3*400)
    # gram_wordclouds_fig.show()
    df_shape = artist_df.shape
    
    return gram_wordclouds_fig, f'df shape: {df_shape}'

# ========= set available artist options based on chosen genre =========
@app.callback(
    Output('ngram-wordcloud-artist-selection-dynamic', 'options'),
    Input('ngram-wordcloud-genre-selection', 'value')
)
def set_ngram_dynamic_artist_options(genre):
    genre_artists = list(artist_ngrams_df[artist_ngrams_df['genre'] == genre]['Artist'].unique())
    options = [{"label": artist, "value": artist} for artist in genre_artists]
    return options

# ======= choose from available dynamic options =========
@app.callback(
    Output('ngram-wordcloud-artist-selection-dynamic', 'value'),
    Input('ngram-wordcloud-artist-selection-dynamic', 'options')
)
def set_ngram_dynamic_artist_value(options):
    return options[0]['value']

## Whole App layout

In [None]:
app.layout = dbc.Container([
    topic_container,
    html.Hr(),
    bar_topic_container,
    html.Hr(),
    # genre_wordcloud_container,
    # html.Hr(),
    artist_wordcloud_container,
    html.Hr(),
    ngram_artist_wordcloud_container,
    html.Hr(),
    artist_bar_topic_container,
    html.Hr(),
    topic_scatter_container,
    html.Hr()
], fluid=True)

In [None]:
app.run(debug=True)

### ========== test ==========

In [None]:
counts_df[counts_df['Artist'].isin(['A Tribe Called Quest', 'MF DOOM'])]