In [1]:
import pandas as pd
import numpy as np
import datetime
import json

from debther_texts import *
from cluster_functions_nb import *

from dash import Dash, html, dcc, Input, Output, callback,State
from dash import dash_table
import plotly.express as px
import plotly.graph_objects as go

In [2]:
cluster_df = pd.DataFrame()
comparison_df = pd.DataFrame()


edition_comparison = Dash(__name__)

edition_comparison.layout = html.Div([
    html.H1(children='Edition comparison - cluster finder', style={'textAlign': 'center'}),
    dcc.Store(id='store_df'),
    html.Button('example_Data',id='load_example',n_clicks=0),

    html.Div([
        # Wrapper Div für die beiden Textbereiche
        html.Div([
            dcc.Markdown(children=f'Text A', id='md_a', style={'width': '100%'}),
            dcc.Textarea(id='text_a_input', style={'width': '90%', 'height': 100}),
            dcc.Input(id='seperators',placeholder="all seperators (z. B.: ',', '|', ';')", 
                style={'width': '90%','height':20}),
            
            html.Div('minimal cluster size', id='slider_head', style={'margin-right': '10px'}),  # Text mit Abstand
               
            dcc.Slider(
                    0, 50, 1, value=10, id='cluster_slider',
                    tooltip={"placement": "bottom", "always_visible": True}, marks=None 
                    ),
            dcc.RadioItems(['Bubble','Line'],value='Bubble',id='graph_type')

            ], style={'width': '45%'}),

            html.Div([
                dcc.Markdown(children=f'Text B', id='md_b', style={'width': '100%'}),
                dcc.Textarea(id='text_b_input', style={'width': '90%', 'height': 100}),
                html.Button('Analyze',id='start_button', style={'width': '20%', 'height': 40},n_clicks=0)
                ], style={'width': '45%'}),
            html.Div([],id='loading')
        
        ], style={'display': 'flex'}),
        html.Div([
                
                html.Div([
                    dcc.Loading([
                    dcc.Graph(id='graph')],id='loading1')
                ], style={'width': '45%'}),

                html.Div([
                    dcc.Loading([
                    dcc.Graph(id='graph_diff')],id='loading2')
                ], style={'width': '45%'})
                
                ], style={'display': 'flex', 'justify-content': 'space-between', 'align-items': 'center'}
                )
        

])

@callback(
        Input('graph','clickData')
)
def get_click(click):
    print(click)


@callback(
    Output('text_a_input','value'),
    Output('text_b_input','value'),
    Output('seperators','value'),
    Input('load_example','n_clicks'),
    prevent_initial_call=True
)
def load_example(click):
    if click >=1:
        text_a = debther_gangtok()#[:30000]
        text_b = debther_peking()#[:30000]
        seperators_1,a,b,c = debther_parameters()
        print(seperators_1)
        seps = r""
        for i in seperators_1:
            seps += ""
            seps += i+","
        print(seps.rstrip(','))
        return text_a,text_b,seps
    else:
        return '','',''

# Analyse and create graph
@callback(
    Output('graph','figure', allow_duplicate=True),
    Output('graph_diff','figure', allow_duplicate=True),
    Output('store_df','data'),
    State('text_a_input','value'),
    State('text_b_input','value'),
    State('seperators','value'),
    State('cluster_slider','value'),
    State('graph_type','value'),
    Input('start_button','n_clicks'),
    prevent_initial_call=True
    
)
def update_graph(text_a,text_b,seps,min_clus,type,clicks):
    if clicks >= 1:
        print('######')
        print(seps)

        seps = seps.split(',')
        print('#################')
        print(seps)
        print('#################')
        text_a = clean(text_a,seps) 
        text_b = clean(text_b,seps)    

        cluster_df = find_cluster(text_a,text_b,int(min_clus),'a','b')
        print(cluster_df)
        if type == 'Bubble':
            fig1 = px.scatter(cluster_df, x='start_a', y='start_b', size='length')
            fig1.update_layout(clickmode='event+select')
            fig2 = px.scatter(cluster_df, x='start_a', y='differenz', size='length')
        else:
            fig1 = go.Figure()
            for _, row in cluster_df.iterrows():
                fig1.add_trace(go.Scatter(
                x=[row['start_a'], row['end_a']],  # X-Koordinaten
                y=[row['start_b'], row['end_b']],  # Y-Koordinaten
                mode='lines+markers',  # Zeigt Linien und Marker
                name=f"Line {row.name}",  # Optionale Beschriftung
                line=dict(color='blue',width=2),  # Linienbreite
                marker=dict(color='blue',size=5)  # Markierungsgröße
                ))
                fig1.update_layout(
                    title="Linien-Visualisierung basierend auf DataFrame",
                    xaxis_title="Text_a",
                    yaxis_title="Text_b",
                    showlegend=False,  # Legende anzeigen
                    )
                fig1.add_trace(go.Scatter(
                    x=[0, max(cluster_df['end_a'].max(), cluster_df['end_b'].max())],  # Bereich für die Diagonale
                    y=[0, max(cluster_df['end_a'].max(), cluster_df['end_b'].max())],  # y=x
                    mode='lines',  # Nur Linie
                    line=dict(color='grey', width=1),  # Rote gestrichelte Linie
                    name='y = x',  # Beschriftung der Linie
                    showlegend=False  # Keine Legende
                ))
            fig2 = go.Figure()
            for _, row in cluster_df.iterrows():
                fig2.add_trace(go.Scatter(
                x=[row['start_a'], row['end_a']],  # X-Koordinaten
                y=[row['differenz'], row['differenz']],  # Y-Koordinaten
                mode='lines+markers',  # Zeigt Linien und Marker
                name=f"Line {row.name}",  # Optionale Beschriftung
                line=dict(color='blue',width=2),  # Linienbreite
                marker=dict(color='blue',size=5)  # Markierungsgröße
                ))
                fig1.update_layout(
                    title="Linien-Visualisierung basierend auf DataFrame",
                    xaxis_title="Text_a",
                    yaxis_title="Text_b",
                    showlegend=False,  # Legende anzeigen
                    )
                fig1.add_trace(go.Scatter(
                    x=[0, max(cluster_df['end_a'].max(), cluster_df['end_b'].max())],  # Bereich für die Diagonale
                    y=[0, 0],  # y=x
                    mode='lines',  # Nur Linie
                    line=dict(color='grey', width=1),  # Rote gestrichelte Linie
                    name='y = x',  # Beschriftung der Linie
                    showlegend=False  # Keine Legende
                ))
        
        cluster_df = cluster_df.to_dict()
        return fig1, fig2, cluster_df
    
    else:
        return px.scatter(),px.scatter(),cluster_df

# only change type of graph
@callback(
    Output('graph','figure', allow_duplicate=True),
    Output('graph_diff','figure', allow_duplicate=True),
    Input('graph_type','value'),
    Input('store_df','data'),
    prevent_initial_call=True
)
def update_graph(type,cluster_df):
    cluster_df = pd.DataFrame(cluster_df)
    print(cluster_df)
    if type == 'Bubble':
        fig1 = px.scatter(cluster_df, x='start_a', y='start_b', size='length')
        fig1.update_layout(clickmode='event+select')
        fig2 = px.scatter(cluster_df, x='start_a', y='differenz', size='length')
    else:
        fig1 = go.Figure()
        for _, row in cluster_df.iterrows():
            fig1.add_trace(go.Scatter(
            x=[row['start_a'], row['end_a']],  # X-Koordinaten
            y=[row['start_b'], row['end_b']],  # Y-Koordinaten
            mode='lines+markers',  # Zeigt Linien und Marker
            name=f"Line {row.name}",  # Optionale Beschriftung
            line=dict(color='blue',width=2),  # Linienbreite
            marker=dict(color='blue',size=5)  # Markierungsgröße
            ))
            fig1.update_layout(
                title="Linien-Visualisierung basierend auf DataFrame",
                xaxis_title="Text_a",
                yaxis_title="Text_b",
                showlegend=False,  # Legende anzeigen
                )
            fig1.add_trace(go.Scatter(
                x=[0, max(cluster_df['end_a'].max(), cluster_df['end_b'].max())],  # Bereich für die Diagonale
                y=[0, max(cluster_df['end_a'].max(), cluster_df['end_b'].max())],  # y=x
                    mode='lines',  # Nur Linie
                    line=dict(color='grey', width=1),  # Rote gestrichelte Linie
                    name='y = x',  # Beschriftung der Linie
                    showlegend=False  # Keine Legende
                ))
        fig2 = go.Figure()
        for _, row in cluster_df.iterrows():
            fig2.add_trace(go.Scatter(
                x=[row['start_a'], row['end_a']],  # X-Koordinaten
                y=[row['differenz'], row['differenz']],  # Y-Koordinaten
                mode='lines+markers',  # Zeigt Linien und Marker
                name=f"Line {row.name}",  # Optionale Beschriftung
                line=dict(color='blue',width=2),  # Linienbreite
                marker=dict(color='blue',size=5)  # Markierungsgröße
                ))
            fig2.update_layout(
                    title="Linien-Visualisierung basierend auf DataFrame",
                    xaxis_title="Text_a",
                    yaxis_title="Text_b",
                    showlegend=False,  # Legende anzeigen
                    )
            fig2.add_trace(go.Scatter(
                    x=[0, max(cluster_df['end_a'].max(), cluster_df['end_b'].max())],  # Bereich für die Diagonale
                    y=[0, 0],  # y=x
                    mode='lines',  # Nur Linie
                    line=dict(color='grey', width=1),  # Rote gestrichelte Linie
                    name='y = x',  # Beschriftung der Linie
                    showlegend=False  # Keine Legende
                ))
        

    return fig1, fig2
    
    


edition_comparison.run(debug=True, jupyter_mode="external", port=9092)

Dash app running on http://127.0.0.1:9092/
