## Train/Test Split

In [2]:
import os
import sys
sys.path.append("/home/martin/Dev/homography_imitation_learning")
import pandas as pd
from utils.processing import unique_video_train_test

prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80_frames"
in_pkl = "22_11_09_loftr_log_pre_processed"

df = pd.read_pickle(os.path.join(prefix, in_pkl + ".pkl"))
df = unique_video_train_test(df)
df.to_pickle(os.path.join(prefix, in_pkl + "_test_train.pkl"))

## Labels Dataframe

In [None]:
import os
import sys
sys.path.append("/home/martin/Dev/homography_imitation_learning")
import numpy as np
import pandas as pd
from utils.io import recursive_scan2df

prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80_single_video_frames_cropped"
in_pkl = "pre_processed_5th_frame_log_new_test_train"
out_pkl = "pre_processed_5th_frame_log_new_test_train_labels.pkl"

df = pd.read_pickle(os.path.join(prefix, in_pkl + ".pkl"))
label_df = pd.DataFrame()

phase_prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80/phase_annotations"
tool_annotation_prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80/tool_annotations"

phase_df = recursive_scan2df(phase_prefix, ".txt")
tool_annotation_df = recursive_scan2df(tool_annotation_prefix, ".txt")

phase_df, tool_annotation_df = phase_df.sort_values("file").reset_index(drop=True), tool_annotation_df.sort_values("file").reset_index(drop=True)
phase_df["vid"], tool_annotation_df["vid"] = phase_df.file.apply(lambda x: int(x.split("-")[0][-2:]) - 1), tool_annotation_df.file.apply(lambda x: int(x.split("-")[0][-2:]) - 1)

for (idx, phase_txt_row), (_, tool_annotation_txt_row) in zip(phase_df.iterrows(), tool_annotation_df.iterrows()):
    if not phase_txt_row.vid in df.vid.unique():
        print("Skipping vid {}".format(phase_txt_row.vid))
        continue

    phase_txt = pd.read_csv(os.path.join(phase_prefix, phase_txt_row.folder, phase_txt_row.file), sep="\t")
    tool_annotation_txt = pd.read_csv(os.path.join(tool_annotation_prefix, tool_annotation_txt_row.folder, tool_annotation_txt_row.file), sep="\t")

    # correct padded end
    phase_txt = phase_txt[:len(df[df.vid == idx])].reset_index(drop=True)
    tool_annotation_txt = tool_annotation_txt[:len(df[df.vid == idx])].reset_index(drop=True)

    if len(df[df.vid == idx]) != len(phase_txt):
        # raise RuntimeError("Wrong length at index {}, {}/{}.".format(idx, len(df[df.vid == idx]), len(phase_txt)))
        print("Wrong length at index {}, {}/{}.".format(idx, len(df[df.vid == idx]), len(phase_txt)))
        phase_txt = phase_txt[:-1]
        # phase_txt.to_csv(os.path.join(phase_prefix, phase_txt_row.folder, phase_txt_row.file), sep="\t")

    # fill missing annotations

    # tmp = phase_txt.iloc[tool_annotation_txt.Frame] #pd.concat([phase_txt, tool_annotation_txt])
    tmp = phase_txt.set_index("Frame").join(tool_annotation_txt.set_index("Frame"))
    # tmp = phase_txt.join(tool_annotation_txt, on="Frame", lsuffix="_phase", rsuffix="_tool_annotation", sort=True)
    label_df = label_df.append(tmp)
    if len(label_df.columns) > 8:
        print(idx, " Merge: ", len(label_df.columns), " phase: ", len(phase_txt.columns), " tool:  ", len(tool_annotation_txt.columns))


label_df["Frame"] = label_df.index
label_df = label_df.reset_index(drop=True)
print(label_df)
label_df.to_pickle(os.path.join(prefix, out_pkl))




# Remove Nan

In [4]:
import os
import pandas as pd

toy = True

if toy:
    prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80_frames"
    data_pkl = "22_11_09_loftr_log_pre_processed_test_train"
else:
    prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80_single_video_frames_cropped"
    data_pkl = "pre_processed_5th_frame_log_new_test_train"
    label_pkl = "pre_processed_5th_frame_log_new_test_train_labels"

df = pd.read_pickle(os.path.join(prefix, data_pkl) + ".pkl")
valid = ~df.duv_mpd.isna().values

print(len(df))
df = df[valid]
print(len(df), len(df.vid.unique())*5)

df.to_pickle(os.path.join(prefix, data_pkl) + "_no_nan.pkl")

if not toy:
    label_df = pd.read_pickle(os.path.join(prefix, label_pkl) + ".pkl")
    print(len(label_df) - len(df))
    label_df = label_df[valid]
    print(len(label_df) - len(df))

    label_df.to_pickle(os.path.join(prefix, label_pkl) + "_no_nan.pkl")

3000
596 20


## Load and Check

In [7]:
import os
import pandas as pd

prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80_frames"
data_pkl = "22_11_09_loftr_log_pre_processed_test_train_no_nan"
label_pkl = "pre_processed_5th_frame_log_test_train_labels"

df = pd.read_pickle(os.path.join(prefix, data_pkl + ".pkl"))
print("Log:")
print(df)
# df = pd.read_pickle(os.path.join(prefix, label_pkl + ".pkl"))
# print("Labels:")
# print(df)



Log:
     folder           file  vid  frame  \
0     vid_0    frame_0.npy    0      0   
5     vid_0    frame_5.npy    0      5   
10    vid_0   frame_10.npy    0     10   
15    vid_0   frame_15.npy    0     15   
20    vid_0   frame_20.npy    0     20   
...     ...            ...  ...    ...   
2970  vid_3  frame_720.npy    3    720   
2975  vid_3  frame_725.npy    3    725   
2980  vid_3  frame_730.npy    3    730   
2985  vid_3  frame_735.npy    3    735   
2990  vid_3  frame_740.npy    3    740   

                                                    duv    duv_mpd  train  
0     [[-7.966131687164307, 17.566049575805664], [-6...  16.792545   True  
5     [[-4.547332286834717, 15.534305572509766], [-8...  17.728071   True  
10    [[-11.780838966369629, 14.508074760437012], [-...  15.166304   True  
15    [[-2.243757963180542, 16.322011947631836], [-0...  11.264307   True  
20    [[-3.3895256519317627, 24.412181854248047], [4...  21.069513   True  
...                               

# Check Size

In [None]:
import os
import pandas as pd
import numpy as np
import math

prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80_single_video_frames_cropped"
data_pkl = "pre_processed_5th_frame_log_new_test_train_no_nan"

df = pd.read_pickle(
    os.path.join(prefix, data_pkl) + ".pkl"
)

df.duv_mpd = df.duv_mpd.apply(lambda x: np.single(x))
df.vid = pd.to_numeric(df.vid, downcast='integer')
df.frame = pd.to_numeric(df.frame, downcast='integer')
df.duv = df.duv.apply(lambda x: [list(map(np.single, xi)) for xi in x] if x == x else x)

print(df.info(memory_usage="deep"))
df.to_pickle(os.path.join(prefix, data_pkl + "_compressed.pkl"))

In [None]:
import os
import numpy as np
import pandas as pd

prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80_single_video_frames_cropped"
data_pkl = "pre_processed_5th_frame_log_test_train"

df = pd.read_pickle(
    os.path.join(prefix, data_pkl) + ".pkl"
)

df.duv_mpd = df.duv_mpd.apply(lambda x: np.single(x))
df.vid = pd.to_numeric(df.vid, downcast='integer')
df.frame = pd.to_numeric(df.frame, downcast='integer')
df.duv = df.duv.apply(lambda x: [list(map(np.float16, xi)) for xi in x] if x == x else x)

print(df.info(memory_usage="deep"))
df.to_pickle(os.path.join(prefix, data_pkl + "_compressed.pkl"))

In [None]:
import pandas as pd

prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80_single_video_frames_cropped"
df = pd.read_pickle(prefix + "/pre_processed_5th_frame_log_new_test_train_no_nan_compressed.pkl")
print(df.info(memory_usage="deep"))

In [None]:
import pandas as pd

prefix = "/media/martin/Samsung_T5/data/endoscopic_data/cholec80_single_video_frames_cropped"
df = pd.read_pickle(prefix + "/pre_processed_5th_frame_log_test_train_compressed.pkl")
print(df.info(memory_usage="deep"))