In [93]:
import pandas
import os

In [94]:
path = r'/Users/dorotheeduvaux 1/UCL CSML/MSc Project'
filename = 'data_steps.csv'
raw_dataset_df = pandas.read_csv(os.path.join(path, filename))
PATH_FOR_SAVING = r'/Users/dorotheeduvaux 1/UCL CSML/MSc Project/Video analytics'

In [95]:
phase_step_map = {
    'tumour_debulking':1,
    'dissection_medial':1,
    'dissection_inferior':1,
    'dissection_superior':1,
    'dissection_lateral':1,
    'idle_time':'p0',
    'rs_approach_and_exposure':0,
    'rs_tumour_debunking_and_excision': 1,
    'rs_closure': 2,
}

### Creating the phase dataset

In [96]:
# removing rows that have duplicate time stamps and cause issues
raw_dataset_df = raw_dataset_df[~raw_dataset_df['step'].isin(['Cystic component opened',
                                                              'Idle time as drilling?', 
                                                              'Drill is being used - Idle time as no label'])]
index_to_exclude1 = raw_dataset_df.loc[(raw_dataset_df['number']==8) 
                   & (raw_dataset_df['step']=='dissection_lateral') 
                   & (raw_dataset_df['timestamp']=='04:34:22')
                  ].index
index_to_exclude2 = raw_dataset_df.loc[(raw_dataset_df['number']==25) 
                   & (raw_dataset_df['step']=='Cannot see surgical field') 
                   & (raw_dataset_df['timestamp']=='03:30:31')
                  ].index
raw_dataset_df = raw_dataset_df.drop(index_to_exclude1) 
raw_dataset_df = raw_dataset_df.drop(index_to_exclude2) 
raw_dataset_df

Unnamed: 0,approach,number,step,timestamp
0,RS,1,rs_approach_and_exposure,00:00:04
1,RS,1,rs_tumour_debunking_and_excision,00:02:38
2,RS,1,tumour_debulking,00:03:09
3,RS,1,idle_time,00:04:44
4,RS,1,tumour_debulking,00:05:44
...,...,...,...,...
2657,RS,9,idle_time,05:12:51
2658,RS,9,tumour_debulking,05:16:13
2659,RS,9,rs_closure,05:16:18
2660,RS,9,idle_time,05:27:57


In [97]:
df = raw_dataset_df.copy()
df['step_number'] = df['step'].map(phase_step_map).fillna("other") 
df['timestamp'] = pandas.to_datetime(df['timestamp'])
video_nums = df['number'].drop_duplicates().values
df.to_csv('/')

  df['timestamp'] = pandas.to_datetime(df['timestamp'])


Unnamed: 0,approach,number,step,timestamp,step_number
0,RS,1,rs_approach_and_exposure,2023-11-12 00:00:04,0
1,RS,1,rs_tumour_debunking_and_excision,2023-11-12 00:02:38,1
2,RS,1,tumour_debulking,2023-11-12 00:03:09,1
3,RS,1,idle_time,2023-11-12 00:04:44,p0
4,RS,1,tumour_debulking,2023-11-12 00:05:44,1
...,...,...,...,...,...
2657,RS,9,idle_time,2023-11-12 05:12:51,p0
2658,RS,9,tumour_debulking,2023-11-12 05:16:13,1
2659,RS,9,rs_closure,2023-11-12 05:16:18,2
2660,RS,9,idle_time,2023-11-12 05:27:57,p0


In [98]:
def adjust_frame_folder_string(value, item):
    path = r'/Users/dorotheeduvaux 1/UCL CSML/MSc Project'
    video_num = '0' + str(item) if item < 10 else str(item) 
    overall_path = os.path.join(path, 'RS_data/video_outputs', str(item), f'video{video_num}_{value}.png')
    return overall_path

In [99]:
#### Take in the total timestamp of each video and construct a frame column
overall_total_timestamp = {
    1:  '06:14:19',
    4:  '05:54:02',
    6:  '06:49:20',
    7:  '06:40:15',
    8:  '05:17:33',
    9:  '05:37:32',
    11: '03:01:30',
    12: '03:16:10',
    13: '02:32:50',
    14: '04:58:50',
    15: '01:24:16',
    16: '03:41:07',
    17: '06:01:18',
    20: '05:33:36',
    21: '05:24:28',
    22: '06:11:55',
    23: '01:58:34',
    24: '06:52:36',
    25: '05:08:07',
    26: '04:47:45',
    27: '03:21:16',
}

In [100]:
pandas_lists = []
import pprint
for item in video_nums:
    try:
        data = df[df['number'] == item]
        data = data.sort_values('timestamp').reset_index().drop(columns='index')
        final_timestamp = overall_total_timestamp[item]
        # data = data.append({'approach':'RS','number': video_num, 'step':'end', 'timestamp': final_timestamp, 'step_number':2}, ignore_index=True)
        data.loc[len(data)] = {'approach':'RS', 'number': item, 'step':'rs_closure', 'timestamp': pandas.to_datetime(final_timestamp), 'step_number':2}
        
        # sample at 1fps
        data = data.set_index('timestamp')
        data = data[~data.index.duplicated()]
        data = data.resample('1s').ffill()
        
        # # map extracted frames at 5fps to the 1fps dataframe
        time_data = pandas.DataFrame({'timestamp': [pandas.to_datetime('00:00:00'), 
                                       pandas.to_datetime(overall_total_timestamp[item])]})
        time_data = time_data.set_index('timestamp')
        time_data = time_data.resample('200ms').ffill()
        time_data['frame_number'] = [str(i).zfill(6) for i in range(1, len(time_data)+1)]
        time_data['frame_number'] = time_data['frame_number'].apply(adjust_frame_folder_string, args=(item,))
        
        # # Join on the file location
        data = pandas.merge(data, time_data, on='timestamp', how='left')
        
        # restrict to the steps that we are interested in + formatting
        data = data[data['step_number'].isin([0,1,2])].reset_index()
        data['timestamp'] = data['timestamp'].dt.time
        
        pandas_lists.append(data)
    except Exception as e:
        print(f"Problem with video {item}: ", e)
    
result = pandas.concat(pandas_lists)
result.to_csv(os.path.join(PATH_FOR_SAVING, 'reduced_phase_frames_data.csv'), index=False)

### Creating a pkl file


In [103]:
frames = pandas.read_csv(os.path.join(PATH_FOR_SAVING, 'reduced_phase_frames_data.csv'))
def modify_frame_name(value):
    new_value = value.replace('/Users/dorotheeduvaux 1/UCL CSML/MSc Project/RS_data/video_outputs',
                              '/home/dorothee/RS_data/final_frames')
    return new_value
frames['frame_number'] = frames['frame_number'].apply(modify_frame_name)
video_nums = frames['number'].drop_duplicates().values
frames

Unnamed: 0,timestamp,approach,number,step,step_number,frame_number
0,00:00:04,RS,1,rs_approach_and_exposure,0,/home/dorothee/RS_data/final_frames/1/video01_...
1,00:00:05,RS,1,rs_approach_and_exposure,0,/home/dorothee/RS_data/final_frames/1/video01_...
2,00:00:06,RS,1,rs_approach_and_exposure,0,/home/dorothee/RS_data/final_frames/1/video01_...
3,00:00:07,RS,1,rs_approach_and_exposure,0,/home/dorothee/RS_data/final_frames/1/video01_...
4,00:00:08,RS,1,rs_approach_and_exposure,0,/home/dorothee/RS_data/final_frames/1/video01_...
...,...,...,...,...,...,...
285593,05:27:53,RS,9,rs_closure,2,/home/dorothee/RS_data/final_frames/9/video09_...
285594,05:27:54,RS,9,rs_closure,2,/home/dorothee/RS_data/final_frames/9/video09_...
285595,05:27:55,RS,9,rs_closure,2,/home/dorothee/RS_data/final_frames/9/video09_...
285596,05:27:56,RS,9,rs_closure,2,/home/dorothee/RS_data/final_frames/9/video09_...


In [104]:
## Option 1 (17/4)
import pickle 

# TODO: amend the path name if I'm running on the server

train_num = []
train_paths = []
train_labels = []
test_num = []
test_paths = []
test_labels = []
val_num = []
val_paths = []
val_labels = []

validation_dataset = [6, 20, 25, 27]
test_dataset = [6, 20, 25, 27]
total_testing_dataset = list(set(validation_dataset + test_dataset))
training_dataset = [video for video in video_nums if video not in total_testing_dataset]

for video in video_nums:
    data = frames[frames['number']==video]
    if video in training_dataset:
        train_num.append(len(data))
        train_paths.extend(data['frame_number'].values)
        train_labels.extend(data['step_number'].values)
    if video in validation_dataset:
        val_num.append(len(data))
        val_paths.extend(data['frame_number'].values)
        val_labels.extend(data['step_number'].values)
    if video in test_dataset:
        test_num.append(len(data))
        test_paths.extend(data['frame_number'].values)
        test_labels.extend(data['step_number'].values)

train_val_test_paths_labels = []
train_val_test_paths_labels.append(train_paths)
train_val_test_paths_labels.append(val_paths)

train_val_test_paths_labels.append(train_labels)
train_val_test_paths_labels.append(val_labels)

train_val_test_paths_labels.append(train_num)
train_val_test_paths_labels.append(val_num)

train_val_test_paths_labels.append(test_paths)
train_val_test_paths_labels.append(test_labels)
train_val_test_paths_labels.append(test_num)

with open(os.path.join(PATH_FOR_SAVING,'server_train_val_paths_phases_labels.pkl'), 'wb') as f:
    pickle.dump(train_val_test_paths_labels, f)

### Moving the required files

In [129]:
import shutil
import os
from pathlib import Path
import pandas

In [130]:
results = pandas.read_csv(os.path.join(PATH_FOR_SAVING, 'reduced_phase_frames_data.csv'))

In [131]:
files = results['frame_number'].values
for file in files:
    destination_folder = Path(str(Path(file)).replace('video_outputs','final_phase_frames'
                                          ).replace('/Users/dorotheeduvaux 1/UCL CSML/MSc Project/RS_data',
                                     '/Volumes/Seagate/RS data/'))
    dest_parent_folder = Path(str(Path(destination_folder).parent))
    if not os.path.exists(dest_parent_folder):
        os.makedirs(dest_parent_folder)
    try:
        shutil.copy(file, destination_folder)
    except Exception as e:
        print(e)

[Errno 2] No such file or directory: '/Users/dorotheeduvaux 1/UCL CSML/MSc Project/RS_data/video_outputs/1/video01_112296.png'
[Errno 2] No such file or directory: '/Users/dorotheeduvaux 1/UCL CSML/MSc Project/RS_data/video_outputs/13/video13_045851.png'
[Errno 2] No such file or directory: '/Users/dorotheeduvaux 1/UCL CSML/MSc Project/RS_data/video_outputs/14/video14_089651.png'
[Errno 2] No such file or directory: '/Users/dorotheeduvaux 1/UCL CSML/MSc Project/RS_data/video_outputs/16/video16_066336.png'
[Errno 2] No such file or directory: '/Users/dorotheeduvaux 1/UCL CSML/MSc Project/RS_data/video_outputs/17/video17_108391.png'
[Errno 2] No such file or directory: '/Users/dorotheeduvaux 1/UCL CSML/MSc Project/RS_data/video_outputs/21/video21_097341.png'
[Errno 2] No such file or directory: '/Users/dorotheeduvaux 1/UCL CSML/MSc Project/RS_data/video_outputs/22/video22_111576.png'
[Errno 2] No such file or directory: '/Users/dorotheeduvaux 1/UCL CSML/MSc Project/RS_data/video_outputs/