In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np

In [ ]:
df = pd.read_csv("../../Datasets/bbc_news_preprocessed_UMAP.csv")
docs = list(df['docs'])

In [53]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22144 entries, 0 to 22143
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   title         22144 non-null  object 
 1   pubDate       22144 non-null  object 
 2   guid          22144 non-null  object 
 3   link          22144 non-null  object 
 4   description   22144 non-null  object 
 5   shortPubDate  22144 non-null  object 
 6   topics        22144 non-null  int64  
 7   topicName     22144 non-null  object 
 8   probs         22144 non-null  float64
 9   docs          22144 non-null  object 
 10  x             22144 non-null  float64
 11  y             22144 non-null  float64
dtypes: float64(3), int64(1), object(8)
memory usage: 2.0+ MB


In [82]:
def createUMAP(dataFrame):
    fig = go.Figure()

    uniqueTopics = np.sort(dataFrame['topics'].unique())
    colors = {topic: f"rgba({i * 30 % 255}, {(i * 60 + 100) % 255}, {(i * 90 + 150) % 255}, 0.6)" for i, topic in enumerate(uniqueTopics)}
    
    centroids = df[['x','y','topicName']].groupby('topicName').mean()

    for topic in uniqueTopics:
        topicData = dataFrame[dataFrame['topics'] == topic]
        fig.add_trace(go.Scattergl(
            x=topicData['x'],
            y=topicData['y'],
            mode='markers',
            hovertext=topicData['title'],
            hoverinfo="text",
            name=topicData['topicName'].iloc[0],
            showlegend=True,
            marker=dict(color=colors[topic])
        ))
    
    for row in centroids.itertuples():
        fig.add_annotation(
            x=row[1], y=row[2],
            text=row[0],
            showarrow=False,
            font=dict(size=10, color='black')
        )

    fig.update_layout(
        template="simple_white",
        title={
            'text': "Documents and Topics",
            'x': 0.5,
            'font':{'size':15}
        },
    )
    
    
    
    fig.update_xaxes(visible=False)
    fig.update_yaxes(visible=False)
    
    return fig

In [83]:
def createHistogram(data, title):
    fig = px.histogram(
        data,
        x="shortPubDate",
        title=title,
        labels={
            "shortPubDate": "Publication Date (Month)",
            "amount": "Document Count"
        }
    )
    fig.update_layout(
        template="simple_white",
        xaxis_title="Publication Date (Month)",
        yaxis_title="Document Count",
        font=dict(size=10, color='black'),
        title_x=0.5
    )
    fig.update_traces(
        hovertemplate="<br>".join([
            "Publication Date: %{x}",
            "Document Count: %{y}",
        ]),
    )
    return fig

In [84]:
umap_fig = createUMAP(df)
app = dash.Dash(__name__)
app.layout = html.Div([
    html.Div([
        dcc.Graph(id='umap-graph', figure=umap_fig, style={'flex-basis': '55%'}),
        dcc.Graph(id='histogram', style={'flex-basis': '45%'})
    ], style={'display': 'flex', 'flexDirection': 'row', 'width': '100vw', 'height': '100vh'})
])

@app.callback(
    Output('histogram', 'figure'),
    [Input('umap-graph', 'clickData')]
)
def displayHistogram(clickData):
    if clickData is None:
        return createHistogram(df, "Document Count by Month - All Topics")
    
    clickTitle = clickData['points'][0]['hovertext']
    topicName = df[df['title'] == clickTitle]['topicName'].iat[0]
    filtered_df = df[df['topicName'] == topicName]

    return createHistogram(
        filtered_df.sort_values(by='shortPubDate'),
        f"Document Count by Month - {topicName}"
    )

if __name__ == '__main__':
    app.run_server(debug=True)