In [1]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
import cv2
import os
import torch
import torch.nn.functional as F

In [2]:
# load the ids of available patients
available = pd.read_csv("/home/nicke/MasterThesis/available_US_probands.csv")
available = available.drop('Unnamed: 0', axis=1)
available = available[available['Anatomy'] != 'BACKGROUND']
available

Unnamed: 0,Id,Patient,Device,Leg,Anatomy,Landmark,Datatype,Vessel in Frame,LM configuration,Vein contrast/cropping error,Artery contrast,Vein boundary,Artery boundary,Gain,Depth,Artefacts,Movement,Total
0,4,1,guys,LT,SFV1,LM5,healthy,1.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,15
1,9,2,guys,RT,SFV1,LM5,healthy,1.0,1.0,3.0,3.0,2.0,2.0,1.0,1.0,2.0,1.0,17
2,10,2,guys,RT,SFV2,LM6,healthy,1.0,1.0,3.0,3.0,3.0,3.0,1.0,1.0,2.0,1.0,19
3,12,2,guys,LT,SFV1,LM5,healthy,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,13
4,13,2,guys,LT,SFV2,LM6,healthy,1.0,2.0,2.0,2.0,3.0,3.0,1.0,1.0,2.0,1.0,18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
468,5394,228,philips,LT,CFV + SFA / PFA,LM3,healthy,1.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,14
469,5396,228,philips,LT,GSV / CFV + SFA / PFA,"LM1,LM3",healthy,1.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,17
470,5682,280,philips,RT,SFV + SFA,LM6,healthy,1.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,16
471,5687,282,philips,RT,SFV + SFA,LM6,healthy,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,12


In [3]:
path_to_data = '/share/data_ultraschall/compressions'

In [4]:
id_list = (available.Id.values).astype(str)

In [5]:
landmarks = pd.read_csv('/home/nicke/MasterThesis/landmarks.csv')
landmarks = landmarks[(landmarks['Start Frames'] != '[]') & (landmarks['End Frames'] != '[]')& (landmarks['End Frames'] != 'DNC')]
landmarks

Unnamed: 0,Id,Landmark,Total Frames,Start Frames,End Frames
0,152,"LM1,LM2",229,[7],[193]
1,156,"LM1,LM2,LM3",258,[22],[187]
2,160,"LM1,LM2",223,[14],[144]
3,24,"LM1, LM2, LM3",90,[0],[80]
4,52,LM1,65,[11],[47]
...,...,...,...,...,...
4987,2764,"LM9, LM10, LM8, LM8, LM10, LM9",187,"[1, 31, 72, 99, 133, 152]","[33, 73, 101, 135, 153, 187]"
4988,2884,LM3,39,[5],[23]
4989,2765,LM8,83,[27],[54]
4990,2885,"LM4,LM5",39,[7],[27]


In [6]:
data = landmarks.merge(available, on='Id')

In [7]:
id_list = (data.Id.values).astype(str)

In [8]:
def load_image_and_seg(path, image):
    
    # load image with PIL and normalize
    img = np.array(Image.open(os.path.join(path,"frames", image))) / 255
    #print(os.path.join(path,"frames", image))
    #print(os.path.join(path,"segmentations", "1", image))
    
    # load seg and normalize over the labels
    seg =np.array(Image.open(os.path.join(path,"segmentations", "1", image))) / 200
    
    return img, seg

def get_image_seg_pairs(prob_id):
    
    
    print(f"Working on id: {prob_id}")
    all_frames = sorted(os.listdir(os.path.join(path_to_data,prob_id,"frames")))

    first_available_frame = np.fromstring(landmarks[landmarks['Id']== int(prob_id)]['Start Frames'].iat[0].strip(']['), sep=',', dtype=int)[0]
    last_available_frame = np.fromstring(landmarks[landmarks['Id']== int(prob_id)]['End Frames'].iat[0].strip(']['), sep=',', dtype=int)
    file_pairs = []
    for j,f_frame in enumerate(first_available_frame):
        i = 2
        while f_frame + i < last_available_frame[j]:
            file_pairs.append([all_frames[f_frame], all_frames[f_frame+i]])
            i = i+2
            if len(file_pairs) > 6:
                break
    
    frame_pairs = []
    seg_pairs = []
    
    # load the seg and frame for fixed and moving
    for fixed_file, moving_file in file_pairs:
        
        fixed, fixed_seg = load_image_and_seg(os.path.join(path_to_data, prob_id), fixed_file)
        moving, moving_seg = load_image_and_seg(os.path.join(path_to_data, prob_id), moving_file)
        
        if fixed_seg.max() == 0:
            continue
        if moving_seg.max() == 0:
            continue
        
        
        # and store them together
        frame_pairs.append([fixed,moving])
        seg_pairs.append([fixed_seg,moving_seg])
    
    return np.array(frame_pairs), np.array(seg_pairs)    
    

In [9]:
frames = []
segs = []
ids = []
for prob_id in id_list:

    frame_pairs, seg_pairs = get_image_seg_pairs(prob_id)
    
    for pair in frame_pairs:
        frames.append(pair)
    for pair in seg_pairs:
        segs.append(pair)


Working on id: 124
Working on id: 413
Working on id: 419
Working on id: 524
Working on id: 545
Working on id: 209
Working on id: 216
Working on id: 456
Working on id: 420
Working on id: 546
Working on id: 214
Working on id: 304
Working on id: 316
Working on id: 418
Working on id: 523
Working on id: 543
Working on id: 544
Working on id: 549
Working on id: 601
Working on id: 610
Working on id: 626
Working on id: 667
Working on id: 668
Working on id: 692
Working on id: 752
Working on id: 772
Working on id: 773
Working on id: 1046
Working on id: 1048
Working on id: 1092
Working on id: 1122
Working on id: 1123
Working on id: 1145
Working on id: 172
Working on id: 176
Working on id: 177
Working on id: 184
Working on id: 211
Working on id: 219
Working on id: 246
Working on id: 307
Working on id: 341
Working on id: 349
Working on id: 353
Working on id: 354
Working on id: 384
Working on id: 402
Working on id: 408
Working on id: 416
Working on id: 426
Working on id: 442
Working on id: 452
Workin

In [10]:
all_frames = torch.from_numpy(np.array(frames))
all_segs = torch.from_numpy(np.array(segs))

In [11]:
assert all_frames.shape == all_segs.shape

In [12]:
all_frames.shape

torch.Size([1736, 2, 150, 150])

In [13]:
train_idx = np.random.choice(len(all_frames), size=int(len(all_frames) * 0.9), replace=False)
test_idx = np.arange(0,len(all_frames))
for idx in train_idx:
    test_idx = test_idx[test_idx != idx]

In [14]:
train_idx

array([ 837,  728, 1520, ...,  166,  233, 1721])

In [15]:
print(train_idx.shape)
test_idx.shape

(1562,)


(174,)

In [16]:
torch.manual_seed(42)
frames = all_frames[torch.from_numpy(train_idx)]
segs = all_segs[torch.from_numpy(train_idx)]

test_frames = all_frames[torch.from_numpy(test_idx)]
test_segs = all_segs[torch.from_numpy(test_idx)]

In [17]:
torch.save(test_frames, "/share/data_ultraschall/nicke_ma/data/test_frames_oneFixed_multipleMoving_dist2.pth")
torch.save(test_segs, "/share/data_ultraschall/nicke_ma/data/test_segs_oneFixed_multipleMoving_dist2.pth")

In [18]:
torch.save(frames, "/share/data_ultraschall/nicke_ma/data/frames_oneFixed_multipleMoving_dist2.pth")
torch.save(segs, "/share/data_ultraschall/nicke_ma/data/segs_oneFixed_multipleMoving_dist2.pth")