# Task 3. Implementation of a dashboard

In [1]:
# Importamos las librerías necesarias
import pandas as pd
import numpy as np
import json
import dash
from dash import dcc, html, Input, Output
import plotly.express as px
from sklearn.decomposition import PCA

In [2]:
# Cargamos los datos necesarios para el dashboard
df_clean = pd.read_csv("df_clean.csv")
results_df = pd.read_csv("results_df.csv")
X_glove = np.load("X_glove.npy")

with open("topics_words.json") as f:
    topics_words = {int(k): v for k, v in json.load(f).items()}


In [None]:
# Inicializamis la app dash
app = dash.Dash(__name__)
app.title = "Detection of depressive symptomatology in social networks Project Dashboard"

app.layout = html.Div([
    html.H1("Detection of depressive symptomatology in social networks Project Dashboard", style={'textAlign': 'center'}),

    html.Div([
        html.Label("Select a dominant topic:", style={'textAlign': 'center'}),
        dcc.Dropdown(
            id='0',
            options=[{'label': f'Topic {i}', 'value': i} for i in sorted(df_clean["dominant_topic"].unique())],
            multi=False,
            placeholder="Select the topic to analyse..."
        )
    ]),

    html.Br(),

    html.H2("*️⃣ Distribution of documents by topic *️⃣", style={'textAlign': 'center'}),
    dcc.Graph(id='1'),

    html.H2("*️⃣ Key words for the selected topic *️⃣", style={'textAlign': 'center'}),
    html.Div(id='2', style={'textAlign': 'center'}),

    html.Br(),

    html.H2("*️⃣ Distribution of tags for the selected topic *️⃣", style={'textAlign': 'center'}),
    dcc.Graph(id='3'),

    html.H2("*️⃣ 2D Document Projection (PCA) *️⃣", style={'textAlign': 'center'}),
    dcc.Graph(id='4'),

    html.H2("*️⃣ Exhaustive Comparison: Model, Vectorizer & Feature Selection", style={'textAlign': 'center'}),

    html.Div([
        html.Label("Select evaluation metric:"),
        dcc.Dropdown(
            id="5",
            options=[{"label": m, "value": m} for m in ["Accuracy", "Precision", "Recall", "F1-Score", "AUC"]],
            value="Accuracy",
        )
    ]),

    html.Div([
        html.Div([
            html.Label("Filter by Model:"),
            dcc.Dropdown(
                id="6",
                options=[{"label": m, "value": m} for m in sorted(results_df["Model"].unique())],
                multi=True,
                placeholder="All Models"
            )
        ]),

        html.Div([
            html.Label("Filter by Vectorizer:"),
            dcc.Dropdown(
                id="7",
                options=[{"label": v, "value": v} for v in sorted(results_df["Vectorizer"].unique())],
                multi=True,
                placeholder="All Vectorizers"
            )
        ]),

        html.Div([
            html.Label("Filter by Feature Selection:"),
            dcc.Dropdown(
                id="8",
                options=[{"label": f, "value": f} for f in sorted(results_df["Feature_Selection"].unique())],
                multi=True,
                placeholder="All Methods"
            )
        ])
    ]),

    dcc.Graph(id="9"),

    html.H4("Filtered Results"),
    dash.dash_table.DataTable(
        id="10",
        columns=[{"name": i, "id": i} for i in results_df.columns],
        style_table={"overflowX": "auto"},
        style_cell={"textAlign": "center"},
        style_header={"fontWeight": "bold"},
        page_size=10
    ),
], style={'backgroundColor': 'white', 'padding': '20px'})

# Callbacks para actualizar los gráficos y la tabla
@app.callback(
    Output('1', 'figure'),
    Output('2', 'children'),
    Output('3', 'figure'),
    Output('4', 'figure'),
    Input('0', 'value')
)
def update_main_visuals(selected_topic):
    filtered_df = df_clean if selected_topic is None else df_clean[df_clean["dominant_topic"] == selected_topic]

    fig_topics = px.histogram(df_clean, x="dominant_topic", title="Number of documents by dominant topic", labels={'dominant_topic': 'Topic'})
    fig_topics.update_layout(title_x=0.5)

    if selected_topic is not None:
        words_list = topics_words[selected_topic]
        word_elements = html.Div([
            html.Span(word, style={'backgroundColor': 'cyan', 'color': 'black', 'padding': '8px', 'margin': '5px'}) for word in words_list
        ])
    else:
        word_elements = html.P("Select a topic to display the most relevant words.")

    fig_labels = px.histogram(
        filtered_df,
        category_orders={"label_name": ["control", "depression"]},
        x="label_name",
        title=f"Tags for Topic {selected_topic}" if selected_topic is not None else "Tags for all documents",
        labels={'label_name': 'Tag'},
        color="label_name"
    )
    fig_labels.update_layout(title_x=0.5)

    pca = PCA(n_components=2, random_state=42)
    X_selected = X_glove if selected_topic is None else X_glove[filtered_df.index]
    coords_pca = pca.fit_transform(X_selected)
    labels_selected = df_clean['label_name'] if selected_topic is None else filtered_df['label_name']

    fig_scatter = px.scatter(
        x=coords_pca[:, 0],
        y=coords_pca[:, 1],
        color=labels_selected,
        category_orders={"color": ["control", "depression"]},
        labels={'x': 'PC1', 'y': 'PC2'},
        title="2D Projected documents (PCA)"
    )
    fig_scatter.update_layout(title_x=0.5)

    return fig_topics, word_elements, fig_labels, fig_scatter

# Callbacks para actualizar la comparación exhaustiva
@app.callback(
    Output("9", "figure"),
    Output("10", "data"),
    Input("5", "value"),
    Input("6", "value"),
    Input("7", "value"),
    Input("8", "value")
)
def update_advanced_comparison(metric, selected_models, selected_vectorizers, selected_features):
    df_filtered = results_df.copy()

    if selected_models:
        df_filtered = df_filtered[df_filtered["Model"].isin(selected_models)]
    if selected_vectorizers:
        df_filtered = df_filtered[df_filtered["Vectorizer"].isin(selected_vectorizers)]
    if selected_features:
        df_filtered = df_filtered[df_filtered["Feature_Selection"].isin(selected_features)]

    fig = px.bar(
        df_filtered,
        x="Feature_Selection",
        y=metric,
        color="Vectorizer",
        barmode="group",
        facet_col="Model",
        category_orders={"Feature_Selection": ["pca", "pls", "kbest", "rfe", "embedded"]},
        title=f"{metric} by Feature Selection and Vectorizer (Faceted by Model)",
        height=500
    )
    fig.update_layout(title_x=0.5)

    return fig, df_filtered.to_dict("records")

# Corremos la app
if __name__ == '__main__':
    app.run(debug=True, port=8051)
