In [1]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.express as px

The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import dash_core_components as dcc
The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  import dash_html_components as html


In [2]:

# Load the corrected data
corrected = pd.read_csv("cleaned_corrected.csv")
corrected=corrected.drop(columns=['Unnamed: 0'])

##extracted
extracted= pd.read_csv("cleaned_extraction.csv")
extracted=extracted.drop(columns=['Unnamed: 0'])

ground_truth= pd.read_excel("GroundTruth_KCD.xlsx")


In [3]:
unit_columns = [f"li{i}_unit" for i in range(1, 9)]

# Filter the DataFrame
corrected_fr = corrected[corrected[unit_columns].isin(["PCE"]).any(axis=1)]
selected_columns = ["document ID"] + unit_columns
corrected_fr = corrected_fr[selected_columns]
corrected_fr["type"]='corrected'

extracted_fr = extracted[extracted[unit_columns].isin(["PCE"]).any(axis=1)]
extracted_fr = extracted_fr[selected_columns]
extracted_fr["type"]='extraction'

ground_fr = ground_truth[ground_truth[unit_columns].isin(["PCE"]).any(axis=1)]
ground_fr = ground_fr[selected_columns]
ground_fr["type"]='ground_truth'


# dataset
combined_df=pd.concat([corrected_fr, extracted_fr, ground_fr], ignore_index=True)


In [4]:
#line item 
corrected_lines=corrected[["num_line_items","document ID"]]
corrected_lines["type"]='corrected'

extracted_lines=extracted[["num_line_items","document ID"]]
extracted_lines["type"]='extracted'

ground_truth_lines=ground_truth[["num_line_items","document ID"]]
ground_truth_lines["type"]='ground_truth'

lines_df=pd.concat([corrected_lines, extracted_lines, ground_truth_lines], ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  corrected_lines["type"]='corrected'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  extracted_lines["type"]='extracted'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ground_truth_lines["type"]='ground_truth'


In [5]:
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Data Visualization"),
    
    html.H2("Bar Chart 1"),
    dcc.Dropdown(
        id='bar-chart-1-dropdown',
        options=[{'label': cat, 'value': cat} for cat in corrected['quote_number_status'].unique()],
        value=['ACCEPTED', 'UNKNOWN'],
        multi=True
    ),
    dcc.Graph(id='bar-chart-1'),
    
    html.H2("Bar Chart 2"),
    dcc.Dropdown(
        id='bar-chart-2-dropdown',
        options=[{'label': cat, 'value': cat} for cat in lines_df['type'].unique()],
        value=['corrected', 'extraction', 'ground_truth'],
        multi=True
    ),
    dcc.Graph(id='bar-chart-2'),
    
    html.H2("Pie Chart"),
    dcc.Dropdown(
        id='pie-chart-dropdown',
        options=[{'label': cat, 'value': cat} for cat in combined_df['type'].unique()],
        value=['corrected', 'extraction', 'ground_truth'],
        multi=True
    ),
    dcc.Graph(id='pie-chart')
])

# Define the callback to update the first bar chart
@app.callback(
    Output('bar-chart-1', 'figure'),
    [Input('bar-chart-1-dropdown', 'value')]
)
def update_bar_chart_1(selected_categories):
    if not selected_categories:
        selected_categories = ['ACCEPTED', 'UNKNOWN']
    
    # Filter the dataframe for the first bar chart
    filtered_bar_df = corrected[corrected['quote_number_status'].isin(selected_categories)]
    
    # Count the number of documents per status category for the bar chart
    bar_count_df = filtered_bar_df['quote_number_status'].value_counts().reset_index()
    bar_count_df.columns = ['quote_number_status', 'document_count']
    
    # Create the bar chart
    bar_fig = px.bar(bar_count_df, x='quote_number_status', y='document_count', 
                     labels={'quote_number_status': 'Document status', 'document_count': 'Number of Documents'},
                     title='Document Count by Status',
                     color_discrete_sequence=px.colors.qualitative.Bold)  # Change color scheme
    
    return bar_fig

# Define the callback to update the second bar chart
@app.callback(
    Output('bar-chart-2', 'figure'),
    [Input('bar-chart-2-dropdown', 'value')]
)
def update_bar_chart_2(selected_categories):
    if not selected_categories:
        selected_categories = ['corrected', 'extraction', 'ground_truth']
    
    # Filter the dataframe for the second bar chart
    filtered_bar_df = lines_df[lines_df['type'].isin(selected_categories)]
    
    # Create the bar chart
    bar_fig = px.bar(filtered_bar_df, y='num_line_items', color='type',
                     labels={'document ID': 'Document ID', 'num_line_items': 'Number of Line Items', 'type': 'Type'},
                     title='Document Count by Type',
                     color_discrete_sequence=px.colors.qualitative.Pastel)  # Change color scheme
    
    # Update layout for better readability
    bar_fig.update_layout(
        xaxis_title='Document ID',
        yaxis_title='Number of Line Items',
        legend_title='Type',
        plot_bgcolor='rgba(0,0,0,0)',  # Remove background color
        bargap=0.2  # Adjust bar spacing
    )
    
    # Add hover template for detailed information on hover
    bar_fig.update_traces(hovertemplate='<b>Document ID:</b> %{x}<br><b>Number of Line Items:</b> %{y}<br><b>Type:</b> %{marker.color}')
    
    return bar_fig

# Define the callback to update the pie chart
@app.callback(
    Output('pie-chart', 'figure'),
    [Input('pie-chart-dropdown', 'value')]
)
def update_pie_chart(selected_types):
    if not selected_types:
        selected_types = ['corrected', 'extraction', 'ground_truth']
    
    # Filter the dataframe for the pie chart
    filtered_pie_df = combined_df[combined_df['type'].isin(selected_types)]
    
    # Count the number of documents per type for the pie chart
    pie_count_df = filtered_pie_df['type'].value_counts().reset_index()
    pie_count_df.columns = ['type', 'document_count']
    
    # Create the pie chart
    pie_fig = px.pie(pie_count_df, names='type', values='document_count',
                     title='PCE In Different Dataset',
                     color_discrete_sequence=px.colors.qualitative.Safe)  # Change color scheme
    
    return pie_fig

if __name__ == '__main__':
    app.run_server(debug=True)