# Task 3: DASHBOARD

### Libraries

In [153]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd
import dash_bootstrap_components as dbc
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import io
import base64
from collections import Counter
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import ast
import numpy as np

In [None]:
# Loading the dataset
df = pd.read_excel('/Users/amaiarodriguez-sierra/Desktop/final project/NUEVO_Reviews.xlsx')

df['Country'] = df['Country'].fillna('Unknown')

# Convert stored string vectors into numerical arrays
df['glove_vector'] = df['glove_vector'].apply(lambda x: np.fromstring(x.strip('[]'), sep=' ') if isinstance(x, str) else x)
df['lda_vector'] = df['lda_vector'].apply(lambda x: np.array(ast.literal_eval(x)) if isinstance(x, str) else x)

# Create vector matrices
glove_matrix = np.stack(df['glove_vector'].values)
lda_matrix = np.stack(df['lda_vector'].values)

# Perform PCA for reducing to 3D for LDA visualization
pca = PCA(n_components=3)
lda_3d = pca.fit_transform(lda_matrix)

# Apply KMeans clustering to the LDA vectors (or use another clustering algorithm)
kmeans = KMeans(n_clusters=5, random_state=42)  # Example with 5 clusters
lda_labels = kmeans.fit_predict(lda_matrix)

df['lda_cluster'] = lda_labels

country_data = df.groupby('Country').agg(
    Number_of_Reviews=('Review', 'size'),
    Avg_Rating=('Rating', 'mean')
).reset_index()

# Initialize the Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Map to highlight countries based on the number of reviews
fig = px.choropleth(country_data,
                    locations="Country", 
                    locationmode="country names",  
                    color="Number_of_Reviews", 
                    hover_name="Country", 
                    color_continuous_scale="Plasma",  
                    title="Hotel Reviews by Country",
                    labels={"Number_of_Reviews": "Number of Reviews"})

# Function to generate a wordCloud from reviews
def generate_wordcloud(country):
    country_reviews = df[df['Country'] == country]
    
    text = " ".join(country_reviews['Review'].values)
    
    # Create the word cloud
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
    
    img = io.BytesIO()
    wordcloud.to_image().save(img, format='PNG')
    img.seek(0)
    
    img_b64 = base64.b64encode(img.getvalue()).decode('utf-8')
    return f"data:image/png;base64,{img_b64}"

# 3D scatter plot for LDA clusters using PCA
fig_3d = px.scatter_3d(
    x=lda_3d[:, 0],  # PCA component 1
    y=lda_3d[:, 1],  # PCA component 2
    z=lda_3d[:, 2],  # PCA component 3
    color=lda_labels,  
    title="LDA Clusters in 3D",
    labels={'x': 'PCA 1', 'y': 'PCA 2', 'z': 'PCA 3'},
)


### App layout

In [None]:
# App layout
app.layout = html.Div(
    style={'fontFamily': 'Arial, sans-serif', 'backgroundColor': '#f2f2f2', 'padding': '20px'},
    children=[
        # Title
        html.Div(
            style={'textAlign': 'center', 'marginBottom': '50px'},
            children=[
                html.H1("Hotel Reviews by Country", style={'fontSize': '36px', 'fontWeight': 'bold', 'color': '#333333'}),
                html.P("Click on a country on the map to see detailed reviews and ratings", style={'fontSize': '18px', 'color': '#666666'})
            ]
        ),

        # Main content with map
        html.Div(
            style={'padding': '20px', 'backgroundColor': '#ffffff', 'borderRadius': '10px', 'boxShadow': '0px 4px 6px rgba(0, 0, 0, 0.1)', 'marginBottom': '30px'},
            children=[
                # Map Section
                html.H4("Click on a country on the map to see detailed reviews and ratings", style={'fontSize': '18px', 'color': '#333333'}),
                dcc.Graph(
                    id="hotel-map",
                    figure=fig,
                    style={'height': '70vh', 'width': '100%'}
                ),
            ]
        ),

        # Section for WordCloud and Stats
        html.Div(
            style={'padding': '20px', 'backgroundColor': '#ffffff', 'borderRadius': '10px', 'boxShadow': '0px 4px 6px rgba(0, 0, 0, 0.1)', 'marginTop': '30px'},
            children=[
                # Title displaying the selected country 
                html.H2("Selected country: ", id="country-title", style={'fontSize': '28px', 'fontWeight': 'bold', 'color': '#333333', 'textAlign': 'left'}),

                # WordCloud Image
                html.Div(
                    style={'display': 'inline-block', 'width': '48%', 'textAlign': 'center'},
                    children=[
                        html.H5("Word Cloud", style={'fontSize': '18px', 'color': '#333333'}),
                        html.Img(
                            id="wordcloud-img",  
                            style={'width': '80%', 'borderRadius': '10px'}
                        ),
                    ]
                ),

                # Mean rating and number of reviews for the selected country
                html.Div(
                    style={'display': 'inline-block', 'width': '48%', 'textAlign': 'center'},
                    children=[
                        dbc.Card(
                            dbc.CardBody([
                                html.H6("Mean Rating", className="card-title"),
                                html.P(id="mean-stat", children="7.0"),
                            ]),
                            style={'width': '18rem'}
                        ),
                        dbc.Card(
                            dbc.CardBody([
                                html.H6("Number of Reviews", className="card-title"),
                                html.P(id="num-reviews", children="100"),
                            ]),
                            style={'width': '18rem'}
                        ),
                    ]
                ),
            ]
        ),

        # LDA Topic Modeling Visualization
        html.Div(
            style={'padding': '20px', 'marginTop': '20px', 'backgroundColor': '#ffffff', 'borderRadius': '10px', 'boxShadow': '0px 4px 6px rgba(0, 0, 0, 0.1)'},
            children=[
                html.H3("LDA Topic Modeling Visualization"),
                html.Iframe(
                    src="/assets/lda_model.html",  
                    width="90%",
                    height="600px",
                    style={'border': 'none'}
                )
            ]
        ),

        # 3D scatter plot for LDA clusters
        html.Div(
            style={'padding': '20px', 'marginTop': '20px', 'backgroundColor': '#ffffff', 'borderRadius': '10px', 'boxShadow': '0px 4px 6px rgba(0, 0, 0, 0.1)'},
            children=[
                html.H3("3D LDA Clusters Visualization"),
                dcc.Graph(
                    id="lda-3d-plot",
                    figure=fig_3d,
                    style={'height': '70vh', 'width': '100%'}
                )
            ]
        ),
    ]
)

### App callback
Update the stats, wordcloud and LDA when a country is clicked on the map

In [None]:
@app.callback(
    [Output("country-title", "children"),
     Output("mean-stat", "children"),
     Output("num-reviews", "children"),
     Output("wordcloud-img", "src")],
    [Input("hotel-map", "clickData")]
)
def display_reviews(clickData):
    # When a country is clicked on the map, update the selection
    if clickData:
        selected_country = clickData['points'][0]['location']
    
    # Filter reviews by the selected country
    country_reviews = df[df['Country'] == selected_country]

    # Calculate the mean rating and number of reviews
    mean_rating = country_reviews['Rating'].mean()
    num_reviews = country_reviews.shape[0]

    # Update the wordCloud
    wordcloud_img = generate_wordcloud(selected_country)

    return f"Selected country: {selected_country}", round(mean_rating, 2), num_reviews, wordcloud_img


### Run the app

In [None]:
# Run the app
if __name__ == '__main__':
    app.run(debug=True)

[2025-05-11 22:49:02,742] ERROR in app: Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "/Users/amaiarodriguez-sierra/.asdf/installs/python/3.11.2/lib/python3.11/site-packages/flask/app.py", line 880, in full_dispatch_request
    rv = self.dispatch_request()
         ^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/amaiarodriguez-sierra/.asdf/installs/python/3.11.2/lib/python3.11/site-packages/flask/app.py", line 865, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)  # type: ignore[no-any-return]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/amaiarodriguez-sierra/.asdf/installs/python/3.11.2/lib/python3.11/site-packages/dash/dash.py", line 1414, in dispatch
    ctx.run(
  File "/Users/amaiarodriguez-sierra/.asdf/installs/python/3.11.2/lib/python3.11/site-packages/dash/_callback.py", line 536, in add_context
    raise err
  File "/Users/amaiarodriguez-sierra/.asdf/in