In [1]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.palettes import Category20_20
from bokeh.layouts import column
from bokeh.models.widgets import Div

# 在Jupyter Notebook中启用Bokeh输出
output_notebook()

# 1. 从CSV文件读取数据（替换为您的文件路径）
file_path = "data.csv"  # 修改为您的CSV文件路径
df = pd.read_csv(file_path)

# 2. 设置分组阈值（微秒） - 根据数据特性调整
THRESHOLD = 5

# 3. 分组算法
def group_by_clock_diff(df, threshold):
    """根据连续行的clock差值小于阈值进行分组"""
    # 计算相邻行的clock差值
    df['clock_diff'] = df['clock'].diff().fillna(threshold + 1)
    
    # 创建分组标识
    group_id = 0
    df['group'] = group_id
    
    # 遍历数据行进行分组
    for i in range(1, len(df)):
        if df.at[i, 'clock_diff'] < threshold:
            df.at[i, 'group'] = group_id
        else:
            group_id += 1
            df.at[i, 'group'] = group_id
    
    # 添加组统计信息
    df['group_size'] = df.groupby('group')['id'].transform('count')
    df['group_duration'] = df.groupby('group')['clock'].transform(lambda x: x.max() - x.min())
    
    return df

# 执行分组
df = group_by_clock_diff(df, THRESHOLD)

# 4. 准备可视化
groups = df['group'].unique()
colors = {group: Category20_20[i % len(Category20_20)] for i, group in enumerate(groups)}
df['color'] = df['group'].map(colors)
source = ColumnDataSource(df)

# 5. 创建主图表
p1 = figure(title=f"Clock Group Analysis (Threshold: {THRESHOLD}μs)", 
            width=800, height=400, tools="pan,wheel_zoom,box_zoom,reset")
p1.scatter('clock', 'value', source=source, size=8, color='color', legend_field='group')

# 添加连接线
for group_id, group_df in df.groupby('group'):
    if len(group_df) > 1:
        p1.line(x=group_df['clock'], y=group_df['value'], color=colors[group_id], alpha=0.5)

# 添加悬停工具
hover1 = HoverTool(tooltips=[
    ("Clock", "@clock μs"),
    ("Value", "@value"),
    ("Group", "@group"),
    ("Size", "@group_size points"),
    ("Duration", "@group_duration μs"),
    ("Diff", "@clock_diff μs")
])
p1.add_tools(hover1)

# 6. 创建组持续时间图表
group_stats = df.groupby('group').agg(
    duration=('clock', lambda x: x.max() - x.min()),
    size=('id', 'count')
).reset_index()
group_stats['color'] = group_stats['group'].map(colors)
group_stats_source = ColumnDataSource(group_stats)

p2 = figure(title="Group Duration", width=800, height=300, 
            x_axis_label='Group ID', y_axis_label='Duration (μs)',
            tools="pan,wheel_zoom,box_zoom,reset")
p2.vbar(x='group', top='duration', width=0.8, source=group_stats_source,
        fill_color='color', line_color="black")

# 添加悬停工具
hover2 = HoverTool(tooltips=[
    ("Group", "@group"),
    ("Duration", "@duration μs"),
    ("Size", "@size points")
])
p2.add_tools(hover2)

# 7. 添加标题和说明
title_text = f"""
<h2>Clock Group Analysis</h2>
<p><b>File:</b> {file_path} | <b>Total Points:</b> {len(df)} | <b>Groups:</b> {len(groups)}</p>
<p><b>Threshold:</b> {THRESHOLD}μs | <b>Min Duration:</b> {group_stats['duration'].min()}μs | <b>Max Duration:</b> {group_stats['duration'].max()}μs</p>
"""
title = Div(text=title_text, width=800)

# 8. 在Notebook中显示图表
show(column(title, p1, p2))