In [1]:
import pandas as pd

# 读取 CSV 文件
df1 = pd.read_csv('yolo_gazed.csv')
df2 = pd.read_csv('dino_sam_gazed.csv')

# 合并两个表格
merged_df = pd.concat([df1, df2], ignore_index=True)

# 确保 timestamp 列是数字类型
merged_df['timestamp'] = pd.to_numeric(merged_df['timestamp'])

# 按照 timestamp 列排序
merged_df = merged_df.sort_values(by='timestamp').reset_index(drop=True)

# 保存合并后的表格到新的 CSV 文件
merged_df.to_csv('sam1_merged_gazed.csv', index=False)

# 输出结果（可选）
print(merged_df)


       index   timestamp  frame                name    x    y  cls  bid
0          0    1.115658     21   pavement sidewalk  574  648  NaN  NaN
1          1    1.120259     22   pavement sidewalk  574  649  NaN  NaN
2          2    1.127507     22   pavement sidewalk  574  647  NaN  NaN
3          3    1.131638     22   pavement sidewalk  574  646  NaN  NaN
4          4    1.135549     22   pavement sidewalk  575  648  NaN  NaN
...      ...         ...    ...                 ...  ...  ...  ...  ...
42808  86584  434.635723  13032           cyclepath  581  977  NaN  NaN
42809  86588  434.655653  13033  pavement cyclepath  579  964  NaN  NaN
42810  86590  434.667725  13033  pavement cyclepath  578  959  NaN  NaN
42811  86591  434.671808  13033  pavement cyclepath  578  958  NaN  NaN
42812  86594  434.687758  13034  pavement cyclepath  573  950  NaN  NaN

[42813 rows x 8 columns]


In [2]:
import pandas as pd

# 1. 读取文件
df = pd.read_csv('sam1_merged_gazed.csv')

# 2. 计算每个注视目标的持续时间
df['duration'] = df['timestamp'].diff().shift(-1)  # 计算相邻时间的差异作为持续时间
df['duration'] = df['duration'].fillna(0)  # 最后一行没有下一个时间戳，填充为0

# 3. 统计每个目标的总注视时间、频次和注视持续时间
summary = df.groupby('name').agg(
    total_duration=('duration', 'sum'),  # 计算总注视时间
    gaze_count=('name', 'count')         # 计算注视次数
).reset_index()

# 4. 计算整体总时间（从第一个到最后一个时间戳）
total_time = df['timestamp'].max() - df['timestamp'].min()

# 5. 计算Saliency, Frequency, Persistency
summary['saliency'] = summary['total_duration'] / total_time  # 注视频率（某目标的总注视时间 / 总观察时间）
summary['frequency'] = summary['gaze_count'] / len(df)  # 出现频率（某目标被注视的次数 / 总帧数）
summary['persistency'] = summary['total_duration'] / summary['gaze_count']  # 注视持续时间（某目标的总注视时间 / 注视次数）

# 6. 输出结果
print(summary[['name', 'saliency', 'frequency', 'persistency']])


                           name  saliency  frequency  persistency
0                     buildings  0.159686   0.201855     0.008011
1                           bus  0.003922   0.006236     0.006369
2                           car  0.044524   0.030855     0.014614
3                     cyclepath  0.051088   0.082662     0.006259
4                         fence  0.005015   0.003013     0.016857
5                 fence grasses  0.000351   0.000327     0.010867
6                       grasses  0.003172   0.002780     0.011558
7                    motorcycle  0.014818   0.003130     0.047947
8                      pavement  0.041746   0.048490     0.008719
9                pavement cycle  0.002864   0.002079     0.013953
10           pavement cyclepath  0.369430   0.403849     0.009264
11  pavement cyclepath sidewalk  0.003029   0.002242     0.013682
12            pavement sidewalk  0.017548   0.014552     0.012213
13                 pavementpath  0.022376   0.015299     0.014811
14        

In [2]:
import pandas as pd

# 1. 读取文件
df = pd.read_csv('merged_gazed.csv')

# 2. 计算每个注视目标的持续时间
df['duration'] = df['timestamp'].diff().shift(-1)  # 计算相邻时间的差异作为持续时间
df['duration'] = df['duration'].fillna(0)  # 最后一行没有下一个时间戳，填充为0

# 3. 统计每个目标的总注视时间、频次和注视持续时间
summary = df.groupby('name').agg(
    total_duration=('duration', 'sum'),  # 计算总注视时间
    gaze_count=('name', 'count')         # 计算注视次数
).reset_index()

# 4. 计算整体总时间（从第一个到最后一个时间戳）
total_time = df['timestamp'].max() - df['timestamp'].min()

# 5. 计算Saliency, Frequency, Persistency
summary['saliency'] = summary['total_duration'] / total_time  # 注视频率（某目标的总注视时间 / 总观察时间）
summary['frequency'] = summary['gaze_count'] / len(df)  # 出现频率（某目标被注视的次数 / 总帧数）
summary['persistency'] = summary['total_duration'] / summary['gaze_count']  # 注视持续时间（某目标的总注视时间 / 注视次数）

# 6. 输出结果
print(summary[['name', 'saliency', 'frequency', 'persistency']])


                           name  saliency  frequency  persistency
0                     buildings  0.162754   0.197747     0.008202
1                           bus  0.003922   0.006138     0.006369
2                           car  0.042863   0.030368     0.014066
3                     cyclepath  0.054682   0.082667     0.006592
4                         fence  0.005802   0.003218     0.017967
5                 fence grasses  0.000342   0.000322     0.010583
6                       grasses  0.004353   0.002759     0.015726
7                    motorcycle  0.009405   0.003080     0.030425
8                      pavement  0.046861   0.045609     0.010239
9                pavement cycle  0.002741   0.001977     0.013815
10           pavement cyclepath  0.345128   0.413241     0.008323
11  pavement cyclepath sidewalk  0.003094   0.002322     0.013282
12            pavement sidewalk  0.026262   0.012759     0.020513
13                 pavementpath  0.018983   0.014506     0.013042
14        

In [None]:
#获得带gaze point和yolo-dino-sam2模型分割masks的视频
import cv2
import pandas as pd
from tqdm import tqdm

# 读取 gaze_positions.csv
gaze_positions_df = pd.read_csv('/home/lnt/PycharmProjects/analyze_gaze/gaze_positions.csv')

# 打开视频文件
cap = cv2.VideoCapture('/home/lnt/PycharmProjects/analyze_gaze/video_dino-sam1_yolo_masks.mp4')
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('/home/lnt/PycharmProjects/analyze_gaze/result/sam1_video_with_masks_gaze.mp4', fourcc, fps, (width, height))

# 获取总帧数
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# 初始化变量
gaze_idx = 0
num_gaze_points = len(gaze_positions_df)

# 逐帧处理视频
for frame_index in tqdm(range(total_frames)):
    ret, frame = cap.read()
    if not ret:
        print(f"Failed to read frame at index {frame_index}")
        break

    # 判断当前帧是否有 gaze 点数据
    while gaze_idx < num_gaze_points and gaze_positions_df.loc[gaze_idx, 'world_index'] == frame_index:
        gaze = gaze_positions_df.loc[gaze_idx]

        # 将归一化坐标转换为像素坐标
        x = int(gaze['norm_pos_x'] * width)
        y = int((1 - gaze['norm_pos_y']) * height)

        # 在帧上绘制 gaze 点（红色）
        cv2.circle(frame, (x, y), 5, (255, 0, 0), -1)  # 用红色圆点表示 gaze

        # 移动到下一个 gaze 点
        gaze_idx += 1

    # 在帧上绘制帧号（绿色文本）
    cv2.putText(frame, f'Frame: {frame_index}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

    # 写入处理后的帧
    out.write(frame)

# 释放资源
cap.release()
out.release()
cv2.destroyAllWindows()
