In [None]:
import pymongo 
import pandas as pd
from textblob import TextBlob
from dash import Dash, dcc, html, Output, Input
import plotly.express as px
from wordcloud import WordCloud
import io
import base64
from datetime import datetime

# Connect to MongoDB
client = pymongo.MongoClient("mongodb+srv://admin:<password>@cluster0.jwzyj.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
db = client["reviews"]
collection = db["reviews_data"]

# Fetch all documents from the collection
data = list(collection.find())

# Convert data into a pandas DataFrame
df = pd.DataFrame(data)


# 2. Data Cleaning and Transformation
if 'Rating' in df.columns:
    df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')
    df.fillna({'Rating': df['Rating'].mean()}, inplace=True)

if 'review_date' in df.columns:
    df['review_date'] = pd.to_datetime(df['review_date'], errors='coerce')

# 3. Sentiment Analysis
def get_sentiment(text):
    if not isinstance(text, str):
        return 'Neutral'
    analysis = TextBlob(text)
    polarity = analysis.sentiment.polarity
    if polarity > 0:
        return 'Positive'
    elif polarity == 0:
        return 'Neutral'
    else:
        return 'Negative'

if 'Review' in df.columns:
    df['sentiment'] = df['Review'].apply(get_sentiment)
else:
    print("Error: 'Review' column not found in the DataFrame.")

# 4. Create Dashboards Using Dash and Plotly

# Initialize Dash app
app = Dash(__name__)

# Layout for the dashboard
app.layout = html.Div([
    html.H1("Review Analysis Dashboard"),

    # Date Range Filter
    html.Div([
        dcc.DatePickerRange(
            id='date-range-picker',
            min_date_allowed=df['review_date'].min() if 'review_date' in df.columns else None,
            max_date_allowed=df['review_date'].max() if 'review_date' in df.columns else None,
            initial_visible_month=df['review_date'].min() if 'review_date' in df.columns else None,
            start_date=df['review_date'].min() if 'review_date' in df.columns else None,
            end_date=df['review_date'].max() if 'review_date' in df.columns else None,
        ),
        html.Div(id='date-range-display')  # Element to display the selected range
    ]),

    # Sentiment pie chart
    html.Div([
        html.H3("Sentiment Analysis"),
        dcc.Graph(id='sentiment-pie-chart')
    ]),

    # Rating distribution histogram
    html.Div([
        html.H3("Rating Distribution"),
        dcc.Graph(id='rating-histogram')
    ]),

    # Word Cloud visualization (Optional)
    html.Div([
        html.H3("Customer Feedback Word Cloud"),
        html.Img(id='word-cloud', style={"width": "100%", "height": "auto"})
    ])
])

# Callback to update date range display and graphs
@app.callback(
    Output('date-range-display', 'children'),
    Output('sentiment-pie-chart', 'figure'),
    Output('rating-histogram', 'figure'),
    Output('word-cloud', 'src'),
    Input('date-range-picker', 'start_date'),
    Input('date-range-picker', 'end_date')
)
def update_display_and_graphs(start_date, end_date):
    if start_date and end_date:
        start_date_display = datetime.strptime(start_date.split('T')[0], '%Y-%m-%d').strftime('%Y-%m-%d')
        end_date_display = datetime.strptime(end_date.split('T')[0], '%Y-%m-%d').strftime('%Y-%m-%d')
        date_range_text = f"{start_date_display} → {end_date_display}"
    else:
        date_range_text = "Select a date range"

    if 'review_date' not in df.columns:
        return date_range_text, px.pie(title="Sentiment Distribution (No Data)"), px.histogram(pd.DataFrame({'empty': []}), title="Rating Distribution (No Data)"), None

    if start_date and end_date:
        start_date = datetime.strptime(start_date.split('T')[0], '%Y-%m-%d').date()
        end_date = datetime.strptime(end_date.split('T')[0], '%Y-%m-%d').date()

        filtered_df = df[(df['review_date'].dt.date >= start_date) & (df['review_date'].dt.date <= end_date)]
    else:
        filtered_df = df

    if filtered_df.empty:
        return date_range_text, px.pie(title="Sentiment Distribution (No Data)"), px.histogram(pd.DataFrame({'empty': []}), title="Rating Distribution (No Data)"), None

    sentiment_counts = filtered_df['sentiment'].value_counts()
    fig_sentiment = px.pie(names=sentiment_counts.index, values=sentiment_counts.values, title="Sentiment Distribution")

    if 'Rating' in filtered_df.columns:
        fig_ratings = px.histogram(filtered_df, x='Rating', title="Rating Distribution", nbins=10)
    else:
        fig_ratings = px.histogram(pd.DataFrame({'empty': []}), title="Rating Distribution (No Rating Data)")

    try:
        if 'Review' in filtered_df.columns:
            wordcloud = WordCloud(width=800, height=400, background_color='white').generate(' '.join(filtered_df['Review'].dropna()))
            img = io.BytesIO()
            wordcloud.to_image().save(img, format='PNG')
            img.seek(0)
            img_b64 = base64.b64encode(img.getvalue()).decode('utf-8')
            wordcloud_src = f"data:image/png;base64,{img_b64}"
        else:
            wordcloud_src = None
    except ValueError:
        wordcloud_src = None
        print("WordCloud generation failed.")

    return date_range_text, fig_sentiment, fig_ratings, wordcloud_src

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)
