In [1]:
import pandas as pd
import numpy as np
import datetime
import json

from debther_texts import *
#from cluster_functions import *
#from cluster_func_c import *
from cluster_func_np import *

from dash import Dash, html, dcc, Input, Output, callback,State
from dash import dash_table
import plotly.express as px
import plotly.graph_objects as go

In [42]:
cluster_df = pd.DataFrame()
comparison_df = pd.DataFrame()


edition_comparison = Dash(__name__)

edition_comparison.layout = html.Div([
    html.H1(children='Edition comparison - cluster finder', style={'textAlign': 'center'}),
    dcc.Store(id='store_df'),dcc.Store(id='clickdata_remove'),
    html.Button('example_Data',id='load_example',n_clicks=0),

    html.Div([
        # Wrapper Div für die beiden Textbereiche
        html.Div([
            dcc.Markdown(children=f'Text A', id='md_a', style={'width': '100%'}),
            dcc.Textarea(id='text_a_input', style={'width': '90%', 'height': 100}),
            dcc.Input(id='seperators',placeholder="all seperators (z. B.: ',', '|', ';')", 
                style={'width': '90%','height':20}),
            
            html.Div('minimal cluster size', id='slider_head', style={'margin-right': '10px'}),  # Text mit Abstand
               
            dcc.Slider(
                    5, 50, 1, value=10, id='cluster_slider',
                    tooltip={"placement": "bottom", "always_visible": True}, marks=None 
                    ),
            dcc.RadioItems(['Bubble','Line'],value='Bubble',id='graph_type')

            ], style={'width': '50%'}),

            html.Div([
                dcc.Markdown(children=f'Text B', id='md_b', style={'width': '100%'}),
                dcc.Textarea(id='text_b_input', style={'width': '90%', 'height': 100}),
                html.Button('Analyze',id='start_button', style={'width': '20%', 'height': 40},n_clicks=0)
                ], style={'width': '50%'}),
            html.Div([],id='loading')
        
        ], style={'display': 'flex'}),
        html.Div([
                
                html.Div([
                    dcc.Loading([
                    dcc.Graph(id='graph')],id='loading1')
                ], style={'width': '50%'}),

                html.Div([
                    dcc.Loading([
                    dcc.Graph(id='graph_diff')],id='loading2')
                ], style={'width': '50%'})
                
                ], style={'display': 'flex', 'justify-content': 'space-between', 'align-items': 'left'}
                ),
                # Buttons for removing and plotting
        html.Div([
                html.Button('Remove Datapoints',id='remove_button', 
                            style={'width': '20%', 'height': 40},n_clicks=0),
                html.Button('Plot',id='plot_button', 
                            style={'width': '20%', 'height': 40},n_clicks=0),
                html.Button('Update Table',id='update_table_button', 
                            style={'width': '20%', 'height': 40},n_clicks=0)
                ], style={'width': '50%'}
                ),
        html.Div([
            dash_table.DataTable(
                id='table',
                columns=[],  # Spaltennamen setzen
                data=[],  # DataFrame als Liste von Dicts übergeben
                style_table={'overflowX': 'auto'},  # Scrollbar aktivieren falls nötig
                style_header={'backgroundColor': 'lightgrey', 'fontWeight': 'bold'},
                style_cell={'textAlign': 'left'},  # Ausrichtung
                page_size=20  # Falls die Tabelle viele Zeilen hat
            )
        ])
       
])

@callback(
        Output('clickdata_remove','data'),
        Input('graph','selectedData'),
        prevent_initial_call=True
)  
def get_click(click):
    return click['points']

@callback(
        Output('store_df','data', allow_duplicate=True),
        Input('remove_button','n_clicks'),
        State('clickdata_remove','data'),
        State('store_df','data'),
        prevent_initial_call=True
)
def remove_data(_,data_remove,data_df):
    df = pd.DataFrame(data_df)
    for i in data_remove:
        start_a,start_b = i['x'],i['y']
        
        data_to_drop = df[(df['start_a']==start_a) & 
                   (df['start_b']==start_b)]
        #print(data_to_drop)
        #print(f'shape: {data_to_drop.shape}')
        if data_to_drop.shape[0]>0:
            print('test')
            df.drop(data_to_drop.index,inplace=True)
        
    data_df = df.to_dict()
    return data_df

@callback(
    Output('text_a_input','value'),
    Output('text_b_input','value'),
    Output('seperators','value'),
    Input('load_example','n_clicks'),
    prevent_initial_call=True
)
def load_example(click):
    if click >=1:
        text_a = debther_gangtok()#[:30000]
        text_b = debther_peking()#[:30000]
        seperators_1,_,_,_ = debther_parameters()
        print(seperators_1)
        seps = r""
        for i in seperators_1:
            seps += ""
            seps += i+","
        print(seps.rstrip(','))
        return text_a,text_b,seps
    else:
        return '','',''

# Analyse and create graph
@callback(
    Output('graph','figure', allow_duplicate=True),
    Output('graph_diff','figure', allow_duplicate=True),
    Output('store_df','data'),
    State('text_a_input','value'),
    State('text_b_input','value'),
    State('seperators','value'),
    State('cluster_slider','value'),
    State('graph_type','value'),
    Input('start_button','n_clicks'),
    prevent_initial_call=True 
)
def update_graph(text_a,text_b,seps,min_clus,type,clicks):
    if clicks >= 1:
        print('######')
        print(seps)

        seps = seps.split(',')
        print('#################')
        print(seps)
        print('#################')
        text_a = clean(text_a,seps) 
        text_b = clean(text_b,seps)    

        cluster_df = find_cluster(text_a,text_b,int(min_clus),'a','b')

        print(cluster_df)
        if type == 'Bubble':
            fig1 = px.scatter(cluster_df, x='start_a', y='start_b', size='length')
            fig1.update_layout(clickmode='event+select')
            fig2 = px.scatter(cluster_df, x='start_a', y='differenz', size='length')
        else:
            fig1 = go.Figure()
            for _, row in cluster_df.iterrows():
                fig1.add_trace(go.Scatter(
                x=[row['start_a'], row['end_a']],  # X-Koordinaten
                y=[row['start_b'], row['end_b']],  # Y-Koordinaten
                mode='lines+markers',  # Zeigt Linien und Marker
                name=f"Line {row.name}",  # Optionale Beschriftung
                line=dict(color='blue',width=2),  # Linienbreite
                marker=dict(color='blue',size=5)  # Markierungsgröße
                ))
                fig1.update_layout(
                    title="Linien-Visualisierung basierend auf DataFrame",
                    xaxis_title="Text_a",
                    yaxis_title="Text_b",
                    showlegend=False,  # Legende anzeigen
                    )
                fig1.add_trace(go.Scatter(
                    x=[0, max(cluster_df['end_a'].max(), cluster_df['end_b'].max())],  # Bereich für die Diagonale
                    y=[0, max(cluster_df['end_a'].max(), cluster_df['end_b'].max())],  # y=x
                    mode='lines',  # Nur Linie
                    line=dict(color='grey', width=1),  # Rote gestrichelte Linie
                    name='y = x',  # Beschriftung der Linie
                    showlegend=False  # Keine Legende
                ))
            fig2 = go.Figure()
            for _, row in cluster_df.iterrows():
                fig2.add_trace(go.Scatter(
                x=[row['start_a'], row['end_a']],  # X-Koordinaten
                y=[row['differenz'], row['differenz']],  # Y-Koordinaten
                mode='lines+markers',  # Zeigt Linien und Marker
                name=f"Line {row.name}",  # Optionale Beschriftung
                line=dict(color='blue',width=2),  # Linienbreite
                marker=dict(color='blue',size=5)  # Markierungsgröße
                ))
                fig1.update_layout(
                    title="Linien-Visualisierung basierend auf DataFrame",
                    xaxis_title="Text_a",
                    yaxis_title="Text_b",
                    showlegend=False,  # Legende anzeigen
                    )
                fig1.add_trace(go.Scatter(
                    x=[0, max(cluster_df['end_a'].max(), cluster_df['end_b'].max())],  # Bereich für die Diagonale
                    y=[0, 0],  # y=x
                    mode='lines',  # Nur Linie
                    line=dict(color='grey', width=1),  # Rote gestrichelte Linie
                    name='y = x',  # Beschriftung der Linie
                    showlegend=False  # Keine Legende
                ))
        
        cluster_df = cluster_df.to_dict()
        return fig1, fig2, cluster_df
    
    else:
        return px.scatter(),px.scatter(),cluster_df

# only change type of graph
@callback(
    Output('graph','figure', allow_duplicate=True),
    Output('graph_diff','figure', allow_duplicate=True),
    Input('graph_type','value'),
    Input('plot_button','n_clicks'),
    State('store_df','data'),
    prevent_initial_call=True
)
def update_graph(type,plot_button,cluster_df):
    cluster_df = pd.DataFrame(cluster_df)
    #print(cluster_df)
    if type == 'Bubble':
        fig1 = px.scatter(cluster_df, x='start_a', y='start_b', size='length')
        fig1.update_layout(clickmode='event+select')
        fig2 = px.scatter(cluster_df, x='start_a', y='differenz', size='length')
    else:
        fig1 = go.Figure()
        for _, row in cluster_df.iterrows():
            fig1.add_trace(go.Scatter(
            x=[row['start_a'], row['end_a']],  # X-Koordinaten
            y=[row['start_b'], row['end_b']],  # Y-Koordinaten
            mode='lines+markers',  # Zeigt Linien und Marker
            name=f"Line {row.name}",  # Optionale Beschriftung
            line=dict(color='blue',width=2),  # Linienbreite
            marker=dict(color='blue',size=5)  # Markierungsgröße
            ))
            fig1.update_layout(
                title="Linien-Visualisierung basierend auf DataFrame",
                xaxis_title="Text_a",
                yaxis_title="Text_b",
                showlegend=False,  # Legende anzeigen
                )
            fig1.add_trace(go.Scatter(
                x=[0, max(cluster_df['end_a'].max(), cluster_df['end_b'].max())],  # Bereich für die Diagonale
                y=[0, max(cluster_df['end_a'].max(), cluster_df['end_b'].max())],  # y=x
                    mode='lines',  # Nur Linie
                    line=dict(color='grey', width=1),  # Rote gestrichelte Linie
                    name='y = x',  # Beschriftung der Linie
                    showlegend=False  # Keine Legende
                ))
        fig2 = go.Figure()
        for _, row in cluster_df.iterrows():
            fig2.add_trace(go.Scatter(
                x=[row['start_a'], row['end_a']],  # X-Koordinaten
                y=[row['differenz'], row['differenz']],  # Y-Koordinaten
                mode='lines+markers',  # Zeigt Linien und Marker
                name=f"Line {row.name}",  # Optionale Beschriftung
                line=dict(color='blue',width=2),  # Linienbreite
                marker=dict(color='blue',size=5)  # Markierungsgröße
                ))
            fig2.update_layout(
                    title="Linien-Visualisierung basierend auf DataFrame",
                    xaxis_title="Text_a",
                    yaxis_title="Text_b",
                    showlegend=False,  # Legende anzeigen
                    )
            fig2.add_trace(go.Scatter(
                    x=[0, max(cluster_df['end_a'].max(), cluster_df['end_b'].max())],  # Bereich für die Diagonale
                    y=[0, 0],  # y=x
                    mode='lines',  # Nur Linie
                    line=dict(color='grey', width=1),  # Rote gestrichelte Linie
                    name='y = x',  # Beschriftung der Linie
                    showlegend=False  # Keine Legende
                ))
        

    return fig1, fig2


@callback(
    Output('table','columns'),
    Output('table','data'),
    Input('update_table_button','n_clicks'),
    State('store_df','data'),
    State('text_a_input','value'),
    State('text_b_input','value'),
    prevent_initial_call=True
)
def update_table(_,data,text_a,text_b):
    df = pd.DataFrame(data)
    compare_df = compare_defter(text_a,text_b,df,text_a_name='a',text_b_name='b')
    cols = [{"name": col, "id": col} for col in compare_df.columns]
    table_data = compare_df.to_dict('records')
    return cols, table_data


edition_comparison.run(debug=True, jupyter_mode="external", port=9093)

Dash app running on http://127.0.0.1:9093/


['་', ' ', '\\n']
་, ,\n
######
་, ,\n,
#################
['་', ' ', '\\n', '']
#################
no characters to replace given.
['་', ' ', '\\n', '']
no characters to replace given.
['་', ' ', '\\n', '']
     start_a    end_a  start_b    end_b  length  differenz
0       19.0     57.0      4.0     42.0    38.0      -15.0
1       58.0     88.0     43.0     73.0    30.0      -15.0
2       91.0    102.0     76.0     87.0    11.0      -15.0
3      107.0    131.0    100.0    124.0    24.0       -7.0
4      132.0    155.0    126.0    149.0    23.0       -6.0
..       ...      ...      ...      ...     ...        ...
816  27941.0  27951.0  51298.0  51308.0    10.0    23357.0
817  27960.0  27970.0  51318.0  51328.0    10.0    23358.0
818  27978.0  28006.0  51336.0  51364.0    28.0    23358.0
819  28007.0  28020.0  51365.0  51378.0    13.0    23358.0
820  28034.0  28056.0  51394.0  51416.0    22.0    23360.0

[821 rows x 6 columns]
--------------------------------------------------------------

In [37]:
help(dash_table)

Help on package dash.dash_table in dash:

NAME
    dash.dash_table

PACKAGE CONTENTS
    DataTable
    Format
    FormatTemplate
    _imports_

CLASSES
    dash.development.base_component.Component(builtins.object)
        dash.dash_table.DataTable.DataTable
    
    class DataTable(dash.development.base_component.Component)
     |  DataTable(data=undefined, columns=undefined, editable=undefined, fixed_columns=undefined, fixed_rows=undefined, column_selectable=undefined, cell_selectable=undefined, row_selectable=undefined, row_deletable=undefined, active_cell=undefined, selected_cells=undefined, selected_rows=undefined, selected_columns=undefined, selected_row_ids=undefined, start_cell=undefined, end_cell=undefined, data_previous=undefined, hidden_columns=undefined, is_focused=undefined, merge_duplicate_headers=undefined, data_timestamp=undefined, include_headers_on_copy_paste=undefined, export_columns=undefined, export_format=undefined, export_headers=undefined, page_action=undefined,