## Train/Test Split

In [5]:
import os
import pandas as pd
from utils.processing import unique_video_train_test

prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80_frames"
in_pkl = "pre_processed_5th_frame_log"

df = pd.read_pickle(os.path.join(prefix, in_pkl + ".pkl"))
df = unique_video_train_test(df)
df.to_pickle(os.path.join(prefix, in_pkl + "_test_train.pkl"))

## Labels Dataframe

In [3]:
import os
import numpy as np
import pandas as pd
from utils.io import recursive_scan2df

prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80_frames"
out_pkl = "pre_processed_log_test_train_labels.pkl"

df = pd.read_pickle(os.path.join(prefix, in_pkl + ".pkl"))
label_df = pd.DataFrame()

phase_prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80/phase_annotations"
tool_annotation_prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80/tool_annotations"

phase_df = recursive_scan2df(phase_prefix, ".txt")
tool_annotation_df = recursive_scan2df(tool_annotation_prefix, ".txt")

phase_df, tool_annotation_df = phase_df.sort_values("file").reset_index(drop=True), tool_annotation_df.sort_values("file").reset_index(drop=True)

for (idx, phase_txt_row), (_, tool_annotation_txt_row) in zip(phase_df.iterrows(), tool_annotation_df.iterrows()):
    phase_txt = pd.read_csv(os.path.join(phase_prefix, phase_txt_row.folder, phase_txt_row.file), sep="\t")
    tool_annotation_txt = pd.read_csv(os.path.join(tool_annotation_prefix, tool_annotation_txt_row.folder, tool_annotation_txt_row.file), sep="\t")

    if len(df[df.vid == idx]) != len(phase_txt):
        # raise RuntimeError("Wrong length at index {}, {}/{}.".format(idx, len(df[df.vid == idx]), len(phase_txt)))
        print("Wrong length at index {}, {}/{}.".format(idx, len(df[df.vid == idx]), len(phase_txt)))
        phase_txt = phase_txt[:-1]
        # phase_txt.to_csv(os.path.join(phase_prefix, phase_txt_row.folder, phase_txt_row.file), sep="\t")

    # fill missing annotations

    # tmp = phase_txt.iloc[tool_annotation_txt.Frame] #pd.concat([phase_txt, tool_annotation_txt])
    tmp = phase_txt.set_index("Frame").join(tool_annotation_txt.set_index("Frame"))
    # tmp = phase_txt.join(tool_annotation_txt, on="Frame", lsuffix="_phase", rsuffix="_tool_annotation", sort=True)
    label_df = label_df.append(tmp)

print(label_df)
label_df.to_pickle(os.path.join(prefix, out_pkl))




                       Phase  Grasper  Bipolar  Hook  Scissors  Clipper  \
Frame                                                                     
0                Preparation      1.0      0.0   0.0       0.0      0.0   
1                Preparation      NaN      NaN   NaN       NaN      NaN   
2                Preparation      NaN      NaN   NaN       NaN      NaN   
3                Preparation      NaN      NaN   NaN       NaN      NaN   
4                Preparation      NaN      NaN   NaN       NaN      NaN   
...                      ...      ...      ...   ...       ...      ...   
43096  GallbladderRetraction      NaN      NaN   NaN       NaN      NaN   
43097  GallbladderRetraction      NaN      NaN   NaN       NaN      NaN   
43098  GallbladderRetraction      NaN      NaN   NaN       NaN      NaN   
43099  GallbladderRetraction      NaN      NaN   NaN       NaN      NaN   
43100  GallbladderRetraction      NaN      NaN   NaN       NaN      NaN   

       Irrigator  Specim

## Load and Check

In [4]:
import os
import pandas as pd

prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80_frames"
data_pkl = "pre_processed_log_test_train"
label_pkl = "pre_processed_log_test_train_labels"

df = pd.read_pickle(os.path.join(prefix, data_pkl + ".pkl"))
print(df)
df = pd.read_pickle(os.path.join(prefix, label_pkl + ".pkl"))
print(df)



       folder             file vid  frame  \
0       vid_0      frame_0.npy   0      0   
1       vid_0      frame_1.npy   0      1   
2       vid_0      frame_2.npy   0      2   
3       vid_0      frame_3.npy   0      3   
4       vid_0      frame_4.npy   0      4   
...       ...              ...  ..    ...   
43096  vid_79  frame_43096.npy  79  43096   
43097  vid_79  frame_43097.npy  79  43097   
43098  vid_79  frame_43098.npy  79  43098   
43099  vid_79  frame_43099.npy  79  43099   
43100  vid_79  frame_43100.npy  79  43100   

                                                     duv    duv_mpd  train  
0      [[-3.891524076461792, 1.4272427558898926], [1....   2.314646  False  
1      [[-1.1083245277404785, 1.033402919769287], [-0...   1.679518  False  
2      [[-2.08209228515625, 1.7637698650360107], [0.8...   1.389289  False  
3      [[-2.904714584350586, 1.9004476070404053], [2....   1.871081  False  
4      [[-0.5867778658866882, 1.8353252410888672], [1...   1.325304  False