In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output
from sklearn.ensemble import IsolationForest
from prophet import Prophet
import datetime
import statsmodels.api as sm

# Load data
data = pd.read_csv('_data.csv')
data['YYYYMMDDHH'] = pd.to_datetime(data['YYYYMMDDHH'], format='%Y%m%d%H')

# Load the search query data
query_data = pd.read_csv('heatmap_data_all_queries.csv')
query_data['Merged'] = pd.to_datetime(query_data['Merged'], format='%Y-%m-%d')

# Initialize the Dash app
app = Dash(__name__)

# Define the layout of the app
app.layout = html.Div([
    dcc.Tabs([
        dcc.Tab(label='Overview', children=[
            dcc.Dropdown(
                id='overview-dropdown',
                options=[{'label': col, 'value': col} for col in data.columns],
                value=['Event countInt'],
                multi=True
            ),
            dcc.DatePickerRange(
                id='overview-date-picker',
                start_date=data['YYYYMMDDHH'].min(),
                end_date=data['YYYYMMDDHH'].max(),
                display_format='YYYY-MM-DD'
            ),
            html.Div(id='overview-graphs')
        ]),
        dcc.Tab(label='Forecast', children=[
            html.Div([
                html.H3('Forecasting Metrics'),
                dcc.Dropdown(
                    id='forecast-dropdown',
                    options=[{'label': col, 'value': col} for col in data.columns if col != 'YYYYMMDDHH'],
                    value=['Event countInt'],
                    multi=True
                ),
                html.Div(id='forecast-graph')
            ])
        ]),
        dcc.Tab(label='Weekly Heatmap Analysis', children=[
            dcc.Dropdown(
                id='weekly-heatmap-metric-dropdown',
                options=[{'label': col, 'value': col} for col in data.columns if col != 'YYYYMMDDHH'],
                value='Event countInt',
                multi=False
            ),
            dcc.DatePickerRange(
                id='weekly-heatmap-date-picker',
                start_date=data['YYYYMMDDHH'].min(),
                end_date=data['YYYYMMDDHH'].max(),
                display_format='YYYY-MM-DD'
            ),
            dcc.Graph(id='weekly-heatmap-graph', style={'height': '800px'})
        ]),
        dcc.Tab(label='Heatmap Analysis', children=[
            dcc.Dropdown(
                id='heatmap-metric-dropdown',
                options=[{'label': col, 'value': col} for col in data.columns if col != 'YYYYMMDDHH'],
                value='Event countInt',
                multi=False
            ),
            dcc.Dropdown(
                id='heatmap-month-dropdown',
                options=[{'label': month, 'value': month} for month in data['YYYYMMDDHH'].dt.strftime('%Y-%m').unique()],
                value=data['YYYYMMDDHH'].dt.strftime('%Y-%m').unique(),
                multi=False
            ),
            dcc.Graph(id='monthly-heatmap-graph', style={'height': '800px'})
        ]),
        dcc.Tab(label='Yearly Heatmap', children=[
            dcc.Dropdown(
                id='yearly-heatmap-metric-dropdown',
                options=[{'label': col, 'value': col} for col in data.columns if col != 'YYYYMMDDHH'],
                value='Event countInt',
                multi=False
            ),
            dcc.Graph(id='yearly-heatmap-graph', style={'height': '800px', 'width': '1500px'})
        ]),
        dcc.Tab(label='Rolling Average', children=[
            dcc.Dropdown(
                id='rolling-average-dropdown',
                options=[{'label': col, 'value': col} for col in data.columns],
                value=['Event countInt'],
                multi=True
            ),
            dcc.Input(
                id='rolling-window-input',
                type='number',
                value=7,
                min=1,
                step=1,
                placeholder='Rolling window size'
            ),
            dcc.Graph(id='rolling-average-graph')
        ]),
        dcc.Tab(label='Search Query Analysis', children=[
            dcc.Dropdown(
                id='query-metric-dropdown',
                options=[{'label': col, 'value': col} for col in query_data.columns if col not in ['Merged', 'hour']],
                value='',
                multi=False
            ),
            dcc.DatePickerRange(
                id='query-date-picker',
                start_date=query_data['Merged'].min(),
                end_date=query_data['Merged'].max(),
                display_format='YYYY-MM-DD'
            ),
            dcc.Graph(id='query-heatmap-graph', style={'height': '800px'}),
            dcc.Graph(id='query-bar-chart', style={'height': '800px'}),
            dcc.Graph(id='query-time-bar-chart', style={'height': '800px'})
        ]),
        dcc.Tab(label='Comparative Search Analysis', children=[
            dcc.Dropdown(
                id='comparative-query-dropdown',
                options=[{'label': col, 'value': col} for col in query_data.columns if col not in ['Merged', 'hour']],
                value=[''],
                multi=True
            ),
            dcc.DatePickerRange(
                id='comparative-date-picker',
                start_date=query_data['Merged'].min(),
                end_date=query_data['Merged'].max(),
                display_format='YYYY-MM-DD'
            ),
            dcc.Graph(id='comparative-scatter-plot', style={'height': '800px'})
        ]),
        dcc.Tab(label='Predictive Modeling', children=[
            dcc.Dropdown(
                id='predictive-query-dropdown',
                options=[{'label': col, 'value': col} for col in query_data.columns if col not in ['Merged', 'hour']],
                value=[''],
                multi=True
            ),
            dcc.DatePickerRange(
                id='predictive-date-picker',
                start_date=query_data['Merged'].min(),
                end_date=query_data['Merged'].max(),
                display_format='YYYY-MM-DD'
            ),
            dcc.Graph(id='predictive-forecast-graph', style={'height': '800px'})
        ]),
        dcc.Tab(label='Anomaly Detection', children=[
            dcc.Dropdown(
                id='anomaly-query-dropdown',
                options=[{'label': col, 'value': col} for col in query_data.columns if col not in ['Merged', 'hour']],
                value='',
                multi=False
            ),
            dcc.DatePickerRange(
                id='anomaly-date-picker',
                start_date=query_data['Merged'].min(),
                end_date=query_data['Merged'].max(),
                display_format='YYYY-MM-DD'
            ),
            dcc.Graph(id='anomaly-graph', style={'height': '800px'})
        ])
    ])
])

# Define the callback to update the overview graphs
@app.callback(
    Output('overview-graphs', 'children'),
    [Input('overview-dropdown', 'value'), Input('overview-date-picker', 'start_date'), Input('overview-date-picker', 'end_date')]
)
def update_overview(selected_columns, start_date, end_date):
    filtered_data = data[(data['YYYYMMDDHH'] >= start_date) & (data['YYYYMMDDHH'] <= end_date)]
    graphs = []
    for col in selected_columns:
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=filtered_data['YYYYMMDDHH'], y=filtered_data[col], mode='lines', name=col))
        fig.update_layout(title=f'Overview of {col}', template='plotly_dark')
        graphs.append(dcc.Graph(figure=fig))
    return graphs

# Define the callback to update the forecast graph
@app.callback(
    Output('forecast-graph', 'children'),
    [Input('forecast-dropdown', 'value')]
)
def update_forecast(selected_metrics):
    graphs = []
    for metric in selected_metrics:
        # Prepare data for forecasting
        forecast_data = data[['YYYYMMDDHH', metric]].rename(columns={'YYYYMMDDHH': 'ds', metric: 'y'})

        # Fit the Prophet model
        model = Prophet()
        model.fit(forecast_data)

        # Create a dataframe for future dates (2 weeks ahead)
        future = model.make_future_dataframe(periods=14*24, freq='H')

        # Make predictions
        forecast = model.predict(future)

        # Plot the forecast
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=forecast['ds'], y=forecast['yhat'], mode='lines', name=f'{metric} Forecast'))
        fig.add_trace(go.Scatter(x=forecast_data['ds'], y=forecast_data['y'], mode='markers', name=f'{metric} Actual'))
        fig.update_layout(title=f'Forecast of {metric}', template='plotly_dark')
        graphs.append(dcc.Graph(figure=fig))
    return graphs

# Define the callback to update the weekly heatmap graph
@app.callback(
    Output('weekly-heatmap-graph', 'figure'),
    [Input('weekly-heatmap-metric-dropdown', 'value'), Input('weekly-heatmap-date-picker', 'start_date'), Input('weekly-heatmap-date-picker', 'end_date')]
)
def update_weekly_heatmap(selected_metric, start_date, end_date):
    weekly_data = data[(data['YYYYMMDDHH'] >= start_date) & (data['YYYYMMDDHH'] <= end_date)]
    weekly_data['Day'] = weekly_data['YYYYMMDDHH'].dt.dayofweek
    weekly_data['Hour'] = weekly_data['YYYYMMDDHH'].dt.hour
    heatmap_data = weekly_data.pivot_table(index='Hour', columns='Day', values=selected_metric, aggfunc='sum')
    fig = px.imshow(heatmap_data, text_auto=True, title=f'Weekly Heatmap of {selected_metric} from {start_date} to {end_date}', color_continuous_scale='Viridis')
    fig.update_layout(height=800)  # Make the heatmap larger as it is too small for 1080p screen
    return fig

# Define the callback to update the monthly heatmap graph
@app.callback(
    Output('monthly-heatmap-graph', 'figure'),
    [Input('heatmap-metric-dropdown', 'value'), Input('heatmap-month-dropdown', 'value')]
)
def update_monthly_heatmap(selected_metric, selected_month):
    monthly_data = data[data['YYYYMMDDHH'].dt.strftime('%Y-%m') == selected_month]
    monthly_data['Hour'] = monthly_data['YYYYMMDDHH'].dt.hour
    heatmap_data = monthly_data.pivot_table(index='Hour', columns=monthly_data['YYYYMMDDHH'].dt.day, values=selected_metric, aggfunc='sum')
    fig = px.imshow(heatmap_data, text_auto=True, title=f'Heatmap of {selected_metric} for {selected_month}', color_continuous_scale='Viridis')
    fig.update_layout(height=800)  # Make the heatmap larger
    return fig

# Define the callback to update the yearly heatmap graph
@app.callback(
    Output('yearly-heatmap-graph', 'figure'),
    [Input('yearly-heatmap-metric-dropdown', 'value')]
)
def update_yearly_heatmap(selected_metric):
    data['Month'] = data['YYYYMMDDHH'].dt.month
    data['Hour'] = data['YYYYMMDDHH'].dt.hour
    heatmap_data = data.pivot_table(index='Month', columns='Hour', values=selected_metric, aggfunc='sum')
    fig = px.imshow(heatmap_data, text_auto=True, title=f'Yearly Heatmap of {selected_metric}', color_continuous_scale='Viridis')
    fig.update_layout(height=800, width=1500, xaxis_title='Hour', yaxis_title='Month')  # Make the heatmap larger and landscape
    fig.update_yaxes(tickmode='array', tickvals=list(range(1, 13)), ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
    return fig

# Define the callback to update the rolling average graph
@app.callback(
    Output('rolling-average-graph', 'figure'),
    [Input('rolling-average-dropdown', 'value'), Input('rolling-window-input', 'value')]
)
def update_rolling_average(selected_columns, window_size):
    fig = go.Figure()
    for col in selected_columns:
        rolling_avg = data[col].rolling(window=window_size).mean()
        fig.add_trace(go.Scatter(x=data['YYYYMMDDHH'], y=rolling_avg, mode='lines', name=f'{col} (Rolling Avg)'))
    fig.update_layout(title=f'Rolling Average (Window Size: {window_size})', template='plotly_dark')
    return fig

# Define the callback to update the search query heatmap graph
@app.callback(
    Output('query-heatmap-graph', 'figure'),
    [Input('query-metric-dropdown', 'value'), Input('query-date-picker', 'start_date'), Input('query-date-picker', 'end_date')]
)
def update_query_heatmap(selected_metric, start_date, end_date):
    query_filtered_data = query_data[(query_data['Merged'] >= start_date) & (query_data['Merged'] <= end_date)]
    query_filtered_data['Hour'] = query_filtered_data['hour']
    heatmap_data = query_filtered_data.pivot_table(index='Hour', columns=query_filtered_data['Merged'].dt.day, values=selected_metric, aggfunc='sum')
    fig = px.imshow(heatmap_data, text_auto=True, title=f'Heatmap of {selected_metric} from {start_date} to {end_date}', color_continuous_scale='Viridis')
    fig.update_layout(height=800)  # Make the heatmap larger
    return fig

# Define the callback to update the search query bar chart
@app.callback(
    Output('query-bar-chart', 'figure'),
    [Input('query-date-picker', 'start_date'), Input('query-date-picker', 'end_date')]
)
def update_query_bar_chart(start_date, end_date):
    query_filtered_data = query_data[(query_data['Merged'] >= start_date) & (query_data['Merged'] <= end_date)]
    query_counts = query_filtered_data.drop(columns=['Merged', 'hour']).sum().reset_index()
    query_counts.columns = ['Query', 'Count']
    fig = px.bar(query_counts, x='Query', y='Count', title='Frequency of Search Queries', labels={'Query': 'Search Query', 'Count': 'Frequency'})
    fig.update_layout(template='plotly_dark')
    return fig

# Define the callback to update the search query time bar chart
@app.callback(
    Output('query-time-bar-chart', 'figure'),
    [Input('query-metric-dropdown', 'value'), Input('query-date-picker', 'start_date'), Input('query-date-picker', 'end_date')]
)
def update_query_time_bar_chart(selected_metric, start_date, end_date):
    query_filtered_data = query_data[(query_data['Merged'] >= start_date) & (query_data['Merged'] <= end_date)]
    time_counts = query_filtered_data.groupby('hour')[selected_metric].sum().reset_index()
    fig = px.bar(time_counts, x='hour', y=selected_metric, title=f'Most Popular Times for {selected_metric}', labels={'hour': 'Hour of Day', selected_metric: 'Frequency'})
    fig.update_layout(template='plotly_dark')
    return fig

# Define the callback to update the comparative search query scatter plot without smoothing
@app.callback(
    Output('comparative-scatter-plot', 'figure'),
    [Input('comparative-query-dropdown', 'value'), Input('comparative-date-picker', 'start_date'), Input('comparative-date-picker', 'end_date')]
)
def update_comparative_scatter_plot(selected_metrics, start_date, end_date):
    query_filtered_data = query_data[(query_data['Merged'] >= start_date) & (query_data['Merged'] <= end_date)]
    fig = go.Figure()
    for metric in selected_metrics:
        non_zero_data = query_filtered_data[query_filtered_data[metric] != 0]
        fig.add_trace(go.Scatter(x=non_zero_data['Merged'], y=non_zero_data[metric], mode='markers', name=metric))
    fig.update_layout(title='Comparative Search Analysis', template='plotly_dark', xaxis_title='Date', yaxis_title='Frequency')
    return fig

# Define the callback to update the predictive forecast graph
@app.callback(
    Output('predictive-forecast-graph', 'figure'),
    [Input('predictive-query-dropdown', 'value'), Input('predictive-date-picker', 'start_date'), Input('predictive-date-picker', 'end_date')]
)
def update_predictive_forecast(selected_metrics, start_date, end_date):
    query_filtered_data = query_data[(query_data['Merged'] >= start_date) & (query_data['Merged'] <= end_date)]
    fig = go.Figure()
    for metric in selected_metrics:
        forecast_data = query_filtered_data[['Merged', metric]].rename(columns={'Merged': 'ds', metric: 'y'})
        forecast_data = forecast_data[forecast_data['y'] != 0]  # Exclude values of 0
        model = Prophet()
        model.fit(forecast_data)
        future = model.make_future_dataframe(periods=7)  # Predict 1 week in advance
        forecast = model.predict(future)
        fig.add_trace(go.Scatter(x=forecast['ds'], y=forecast['yhat'], mode='lines', name=f'{metric} Forecast'))
        fig.add_trace(go.Scatter(x=forecast_data['ds'], y=forecast_data['y'], mode='markers', name=f'{metric} Actual'))
    fig.update_layout(title='Predictive Modeling of Search Queries', template='plotly_dark', xaxis_title='Date', yaxis_title='Frequency')
    return fig

# Define the callback to update the anomaly detection graph
@app.callback(
    Output('anomaly-graph', 'figure'),
    [Input('anomaly-query-dropdown', 'value'), Input('anomaly-date-picker', 'start_date'), Input('anomaly-date-picker', 'end_date')]
)
def update_anomaly_graph(selected_metric, start_date, end_date):
    query_filtered_data = query_data[(query_data['Merged'] >= start_date) & (query_data['Merged'] <= end_date)]
    isolation_forest = IsolationForest(contamination=0.01)
    query_filtered_data['anomaly'] = isolation_forest.fit_predict(query_filtered_data[[selected_metric]])
    anomalies = query_filtered_data[query_filtered_data['anomaly'] == -1]
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=query_filtered_data['Merged'], y=query_filtered_data[selected_metric], mode='lines', name=selected_metric))
    fig.add_trace(go.Scatter(x=anomalies['Merged'], y=anomalies[selected_metric], mode='markers', name='Anomalies', marker=dict(color='red', size=10)))
    fig.update_layout(title='Anomaly Detection in Search Queries', template='plotly_dark', xaxis_title='Date')
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True, port=8051)