In [None]:
Importing all dependencies

In [None]:
from dash import Dash, html, dcc, Input, Output
import plotly.express as px
import dash_bootstrap_components as dbc
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
import base64
from io import BytesIO

In [None]:
!pip install dash
!pip install dash-bootstrap-components
!pip install wordcloud

In [None]:
Loadind Data

In [None]:
# Specify the full path to the CSV file
file_path = r'D:\Data Science\AAIT\Cap Stone Project\Data\Merged Data\Fillna\Merged_reviews_cleaned_labeled_topic II.csv'

# Load the reviews data from the specified pathQ
df_labeled = pd.read_csv(file_path)

In [None]:
Exploratory Analysis and Dashboard (with drop down)

In [4]:
# Function to create word-frequency cloud plots
def create_wordcloud(data):
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(" ".join(data))
    fig, ax = plt.subplots(figsize=(10, 6))  
    ax.imshow(wordcloud, interpolation='bilinear')
    ax.axis('off')
    plt.close(fig)  
    return fig

# Function to generate base64 image from matplotlib figure
def fig_to_base64(fig):
    buf = BytesIO()
    fig.savefig(buf, format="png")
    buf.seek(0)
    img_base64 = base64.b64encode(buf.read()).decode("ascii")
    plt.close(fig) 
    return f"data:image/png;base64,{img_base64}"

# Set up the Dash app
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.layout = dbc.Container([
    html.H1("Exploratory Analysis and Dashboard", className='mb-2', style={'textAlign': 'center'}),

    # Dropdown for selecting hotel site
    dbc.Row([
        dbc.Col([
            dcc.Dropdown(
                id='hotel_site',
                value='All Hotels',
                clearable=False,
                options=[{'label': site, 'value': site} for site in ['All Hotels'] + df_labeled['Hotel_site'].unique().tolist()]
            )
        ], width=12),
    ], justify="center", className="mb-4"),  

    # Plots and Figures
    dbc.Row([
        dbc.Col([html.Img(id='wordcloud-plot', style={'width': '100%'})], width=12),
    ], justify="center", className="mb-4"),

    dbc.Row([
        dbc.Col([html.Img(id='topics-plot', style={'width': '100%'})], width=12),
    ], justify="center", className="mb-4"),

    dbc.Row([
        dbc.Col([html.Img(id='sentiment-analysis-plot', style={'width': '100%'})], width=12),
    ], justify="center", className="mb-4"),

    dbc.Row([
        dbc.Col([html.Img(id='ratings-plot', style={'width': '100%'})], width=12),
    ], justify="center", className="mb-4"),

    # Summary statistics
    dbc.Row([
        dbc.Col([html.Div(id='summary-stats')], width=12),
    ], justify="center", className="mb-4"),
])

@app.callback(
    Output('wordcloud-plot', 'src'),
    Output('topics-plot', 'src'),
    Output('sentiment-analysis-plot', 'src'),
    Output('ratings-plot', 'src'),
    Output('summary-stats', 'children'),
    Input('hotel_site', 'value')
)
def update_dashboard(hotel_site):
    # Filter data based on selected hotel site
    if hotel_site == 'All Hotels':
        filtered_df = df_labeled.copy()
    else:
        filtered_df = df_labeled[df_labeled['Hotel_site'] == hotel_site].copy()  

    # Drop rows with missing Review_comment values
    filtered_df = filtered_df.dropna(subset=['Review_comment'])

    # Ensure Review_comment column contains strings
    filtered_df.loc[:, 'Review_comment'] = filtered_df['Review_comment'].astype(str)  

    # Word Frequency Cloud Plot
    wordcloud_fig = create_wordcloud(filtered_df['Review_comment'])
    wordcloud_fig_base64 = fig_to_base64(wordcloud_fig)

    # Topics of Improvement Areas
    negative_reviews = filtered_df[filtered_df['Sentiment'] == 'Negative']
    negative_topics = negative_reviews['Topic_Label'].value_counts()
    if not negative_topics.empty:
        fig, ax = plt.subplots(figsize=(10, 6))  
        sns.barplot(x=negative_topics.index, y=negative_topics.values, ax=ax)
        ax.set_xlabel('Topic')
        ax.set_ylabel('Number of Negative Reviews')
        ax.set_title('Topics of Improvement Areas')
        topics_fig_base64 = fig_to_base64(fig)
    else:
        topics_fig_base64 = None

    # Sentiment Analysis Plot
    sentiment_counts = filtered_df['Sentiment'].value_counts()
    fig, ax = plt.subplots(figsize=(10, 6))  
    sns.barplot(x=sentiment_counts.index, y=sentiment_counts.values, ax=ax)
    ax.set_xlabel('Sentiment')
    ax.set_ylabel('Number of Reviews')
    ax.set_title('Sentiment Analysis')
    sentiment_fig_base64 = fig_to_base64(fig)

    # Ratings Plot
    rating_counts = filtered_df['Rating'].value_counts().sort_index()
    fig, ax = plt.subplots(figsize=(10, 6))  
    sns.barplot(x=rating_counts.index, y=rating_counts.values, ax=ax)
    ax.set_xlabel('Rating')
    ax.set_ylabel('Number of Reviews')
    ax.set_title('Ratings')
    ratings_fig_base64 = fig_to_base64(fig)

    # Summary Statistics
    summary_stats = html.Div([
        html.H4("Summary Statistics"),
        html.P(f"Total Reviews: {len(filtered_df)}"),
        html.P(f"Average Rating: {filtered_df['Rating'].mean():.2f}"),
        html.P(f"Number of Positive Reviews: {sentiment_counts.get('Positive', 0)}"),
        html.P(f"Number of Negative Reviews: {sentiment_counts.get('Negative', 0)}"),
        html.P(f"Number of Neutral Reviews: {sentiment_counts.get('Neutral', 0)}")
    ])

    return wordcloud_fig_base64, topics_fig_base64, sentiment_fig_base64, ratings_fig_base64, summary_stats

if __name__ == '__main__':
    app.run_server(debug=False, port=8003)
