In [1]:
import dash
import dash_bootstrap_components as dbc
from dash import dcc, html, Input, Output
import pandas as pd
import plotly.graph_objects as go
import numpy as np
import seaborn as sns
from scipy.stats import f_oneway
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import learning_curve
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans
from plotly_task import *
#from footer import CustomFooter
#from header import CustomHeader

In [2]:
def train_and_evaluate_mlp_regression():
    # Load the DataFrames of each cluster from CSV files
    cluster_0_df = pd.read_csv('cluster_0.csv')
    cluster_1_df = pd.read_csv('cluster_1.csv')
    cluster_2_df = pd.read_csv('cluster_2.csv')

    # Concatenate the DataFrames of each cluster into a single DataFrame
    # with columns 'tag1', 'tag7_resp', and 'cluster_labels'
    merged_df = pd.concat([cluster_0_df, cluster_1_df, cluster_2_df], ignore_index=True)

    # Separate the features (clusters) and the target (tag7_resp)
    X = merged_df.drop(['tag7_resp'], axis=1)
    y = merged_df['tag7_resp']

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize and train the MLPRegressor model
    mlp_regressor = MLPRegressor(hidden_layer_sizes=(256, 128, 64, 32), random_state=42)
    mlp_regressor.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = mlp_regressor.predict(X_test)

    # Evaluate the model's performance (MSE)
    mse = mean_squared_error(y_test, y_pred)

    # Call the function to perform the ANOVA test
    selected_cols = ['tag1', 'tag7_resp']
    K = 3
    f_stat, p_val, result = perform_anova_test(df, selected_cols, K)

    # Return y_pred, y_test, mlp_regressor, X, y, X_test, mse, f_stat, p_val, and result
    return y_pred, y_test, mlp_regressor, X, y, X_test, mse, f_stat, p_val, result

# Call the function and get the MSE and ANOVA test results
y_pred, y_test, mlp_regressor, X, y, X_test, mse, f_stat, p_val, result = train_and_evaluate_mlp_regression()


In [None]:
# Sample data loading
df = pd.read_csv('decision_science_dataset.csv')

#external_stylesheets = ['styles.css']

# Create the Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

app.title = "Data Analysis"

# Layout of the app
app.layout = html.Div(id='header-container', children=[
    html.H1('Data Analysis', id='header-text'),

    html.Br(),
    html.Br(),
    html.Br(),
    html.Br(),
    html.Br(),
    html.Br(),
    html.Br(),
    html.Br(),
    html.Br(),
    html.Br(),
    
    dbc.Container([
    
        html.Br(),
        html.Br(),
        html.Br(),
    
        dbc.Row([
            dbc.Col([
                html.H2("Correlation Heatmap"),
                dcc.Graph(
                    id='heatmap', 
                    figure=create_heatmap(df),
                    style={'height': '536px'}
                )
            ], width=6),

            dbc.Col([
                html.H2("Histogram"),
                dcc.Dropdown(
                    id='column-selector',
                    options=[{'label': col, 'value': col} for col in df.columns],
                    value=df.columns[0],
                    style={'width': '100%'}
                ),
                dcc.Graph(
                    id='histogram',
                    style={'height': '500px'}
                )
            ], width=6)
        ]),
        
        dbc.Row([
            dbc.Col([
                html.H5("Analyzing the heatmap, we can see that the variables with the highest correlations are 'tag1' and 'tag7_resp'. Therefore, all the analyses in this dashboard will be based on those variables."),
            ], width=6),

            dbc.Col([
                html.H5("This histogram was primarily created to display the sine and cosine of the data, which can be observed by examining both 'tag5' and 'tag6'."),
            ], width=6)
        ]),

        html.Hr(),

        dbc.Row([
            dbc.Col([
                html.H2("Box Plot"),
                dcc.Graph(
                    id='box-plot', 
                    figure=create_box_plot(df),
                    style={'height': '500px'}
                )
            ], width=6),

            dbc.Col([
                html.H2("Violin Plot"),
                dcc.Graph(
                    id='violin-plot',
                    figure=create_violin_plot(df),
                    style={'height': '500px'}
                )
            ], width=6)
        ]),
        
        dbc.Row([
            dbc.Col([
                html.H5("The box plot helps you visualize how 'tag7_resp' increases with respect to 'tag1'."),
            ], width=6),

            dbc.Col([
                html.H5("Like the box plot, the violin plot shows you the correlation between variables, and the larger the difference on the plot, the stronger the correlation."),
            ], width=6)
        ]),
    
        html.Hr(),

        dbc.Row([
            dbc.Col([
                html.H2("Line Chart 1"),
                dcc.Dropdown(
                    id='line-chart1-selector',
                    options=[{'label': col, 'value': col} for col in df.columns],
                    value=df.columns[0],
                    style={'width': '100%'}
                ),
                dcc.Graph(
                    id='line-chart1',
                    style={'height': '450px'}
                )
            ], width=6),

            dbc.Col([
                html.H2("Line Chart 2"),
                dcc.Dropdown(
                    id='line-chart2-selector',
                    options=[{'label': col, 'value': col} for col in df.columns],
                    value=df.columns[1],  # You can set different initial values if desired
                    style={'width': '100%'}
                ),
                dcc.Graph(
                    id='line-chart2',
                    style={'height': '450px'}
                )
            ], width=6)
        ]),
        
        dbc.Row([
            dbc.Col([
                html.H5("You can check the similarity between the curves of 'tag1' with 'tag7_resp' and 'tag5' with 'tag6'."),
            ], width=6),

            dbc.Col([
                html.H5("The two line plots were displayed side by side to facilitate visualizing the similarities effectively."),
            ], width=6)
        ]),

        html.Hr(),

        dbc.Row([
            dbc.Col([
                html.H2("Elbow Method"),
                dcc.Graph(
                    id='elbow-method', 
                    figure=elbow_method(df),
                    style={'height': '550px'}
                )
            ], width=6),

            dbc.Col([
                html.H2("K-means Clustering"),
                dcc.Graph(
                    id='k-means-clustering', 
                    figure=create_cluster(df),
                    style={'height': '550px'}
                )
            ], width=6)
        ]),
        
        dbc.Row([
            dbc.Col([
                html.H5("The Elbow Method was instrumental in determining the optimal number of clusters to divide the data."),
            ], width=6),

            dbc.Col([
                html.H5("In this scatter plot, you can observe the clusters that were created through the K-means algorithm. The plot displays the cluster labels, their respective volumes, and, finally, the positions of their centroids."),
            ], width=6)
        ]),
    
        html.Hr(),

        dbc.Row([
            dbc.Col([
                html.H2("Predictions vs. Actual Values"),
                dcc.Graph(
                    id='predictions-vs-actual',
                    figure=plot_predictions(y_test, y_pred),
                    style={'height': '500px'}
                )
            ], width=6),
        
            dbc.Col([
                html.H2("Real values vs. Model Predictions"),
                dcc.Graph(
                    id='real-vs-predictions',
                    figure=plot_residuos(y_test, y_pred),
                    style={'height': '500px'}
                )
            ], width=6),
        ]),
        
        dbc.Row([
            dbc.Col([
                html.H5("The scatter plot was created to compare model predictions (y_pred) with the actual values (y_test). The points on the graph represent each example in the test set (a pair of real and predicted values). Add a dashed reference line connecting the minimum and maximum points of the actual values (y_test), represented in 'fuchsia' color."),
            ], width=6),

            dbc.Col([
                html.H5("This graph displays the residuals, which are the differences between the actual values (y_test) and the values predicted (y_pred) by the model. Create a scatter plot to visualize the relationship between the model predictions (y_pred) and the residuals. The points on the graph represent each example in the test set, where the x-axis displays the model predictions and the y-axis displays the residuals (differences between the actual and predicted values)."),
            ], width=6)
        ]),
    
        html.Hr(),
    
        dbc.Row([
              #dbc.Col([
                #html.H2("Learning Curve"),
                #dcc.Graph(
                    #id='learning-curve',
                    #figure=plot_learning_curve(mlp_regressor, X, y),
                    #style={'height': '500px'}
                #)
            #], width=6),
        
            dbc.Col([
                html.H2("Predicted Values vs. tag7_resp"),
                dcc.Graph(
                    id='temporal-series',
                    figure=plot_temporal_series(X_test, y_test, y_pred),
                    style={'height': '500px'}
                )
            ], width=6),
            
            dbc.Col([
                html.H2("Other Observations:"),
                html.H5(f"Mean Squared Error (MSE) of the MLPRegressor Model: {mse:.2f}"),
                html.Br(),
                html.H5(f"ANOVA Test Result:"),
                html.H5(f"F-statistic: {f_stat}"),
                html.H5(f"P-value: {p_val:e}"),
                html.H5(result),
                
            ], width=6),
        ]),
        
        dbc.Row([
            dbc.Col([
                html.H5("This plot is useful for visualizing how the model predictions compare to the real values over time in a time series, allowing you to assess the performance of the regression model across the dates or timestamps of the observations."),
            ], width=6),
        ]),
        
    
    ], fluid=True, style={'maxWidth': '1600px', 'margin': '0 auto', 'padding': '0 200px'}),
    
    #CustomFooter()
    html.Br(),
    html.Br(),
    html.Br(),
    html.Br(),
    html.Div([
        html.Img(src='/assets/logo.png', id='imagem-final')
    ], id='imagem-container'),

    #Elemento para representar o rodapé
    html.Footer("© 2023 Giovana", id="rodape")
    
])

# Callbacks to update the histogram and line charts based on user selection
@app.callback(
    Output('histogram', 'figure'),
    [Input('column-selector', 'value')]
)
def update_histogram(selected_column):
    return create_histogram(df, selected_column)

@app.callback(
    Output('line-chart1', 'figure'),
    [Input('line-chart1-selector', 'value')]
)
def update_line_chart1(selected_column):
    return create_line_chart(df, selected_column)

@app.callback(
    Output('line-chart2', 'figure'),
    [Input('line-chart2-selector', 'value')]
)
def update_line_chart2(selected_column):
    return create_line_chart(df, selected_column)

@app.callback(
    Output('predictions-vs-actual', 'figure'),
    [Input('column-selector', 'value')]
)
def update_predictions_vs_actual(selected_column):
    return plot_predictions(y_test, y_pred)

@app.callback(
    Output('real-vs-predictions', 'figure'),
    [Input('column-selector', 'value')]
)
def update_predictions_vs_actual(selected_column):
    return plot_residuos(y_test, y_pred)

#@app.callback(
    #Output('learning-curve', 'figure'),
    #[Input('column-selector', 'value')]
#)
#def update_predictions_vs_actual(selected_column):
    #return plot_learning_curve(estimator, X, y, cv=5, scoring='neg_mean_squared_error')

@app.callback(
    Output('temporal-series', 'figure'),
    [Input('column-selector', 'value')]
)
def update_predictions_vs_actual(selected_column):
    return plot_temporal_series(X_test, y_test, y_pred)


if __name__ == '__main__':
    app.run_server(debug='')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

Cluster 0: 2724 instances
Cluster 1: 3309 instances
Cluster 2: 2329 instances
Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [26/Jul/2023 20:47:33] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [26/Jul/2023 20:47:33] "GET /assets/style.css?m=1690415187.9097917 HTTP/1.1" 200 -
127.0.0.1 - - [26/Jul/2023 20:47:33] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [26/Jul/2023 20:47:34] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [26/Jul/2023 20:47:34] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 200 -
127.0.0.1 - - [26/Jul/2023 20:47:34] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 200 -
127.0.0.1 - - [26/Jul/2023 20:47:34] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 200 -
127.0.0.1 - - [26/Jul/2023 20:47:34] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [26/Jul/2023 20:47:34] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [26/Jul/2023 20:47:34] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [26/Jul/2023 20:47:34] "POST /_dash-update-com