In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import cv2
from turbojpeg import TurboJPEG
import os
import sys
import joblib
sys.path.append('/home/simon/Code/MasterThesis/project/include')

In [15]:
DATA_BASE = '/data/simon'
OUTPUT_PATH = 'processed_retina_data'
OUTPUT_RESOLUTION = (1280, 960)

labels = pd.read_csv(os.path.join(DATA_BASE, 'trainLabels.csv'))
labels.head()

Unnamed: 0,image,level
0,10_left,0
1,10_right,0
2,13_left,0
3,13_right,0
4,15_left,1


In [16]:
labels['severity'] = labels.level.map(lambda v: 0 if v <= 1 else 1)
labels['image'] = labels.image.map(lambda v: os.path.join(DATA_BASE, 'retina_data/', f'{v}.jpeg'))

print(labels.level.unique())
print(labels.severity.unique())

labels.head()

[0 1 2 4 3]
[0 1]


Unnamed: 0,image,level,severity
0,/data/simon/retina_data/10_left.jpeg,0,0
1,/data/simon/retina_data/10_right.jpeg,0,0
2,/data/simon/retina_data/13_left.jpeg,0,0
3,/data/simon/retina_data/13_right.jpeg,0,0
4,/data/simon/retina_data/15_left.jpeg,1,0


In [12]:
print('Table size before filtering: ', len(labels))
jpeg = TurboJPEG('/opt/libjpeg-turbo/lib64/libturbojpeg.so')

for index, row in labels.iterrows():
    in_file = open(row.image, 'rb')
    img = jpeg.decode(in_file.read())
    in_file.close() 
    #img = cv2.imread(row.image)
    if img is None:
        labels.drop(index, inplace=True)
        continue
    h, w, c = img.shape
    if h < 600 or w < 800:
        labels.drop(index, inplace=True)
        continue
    labels.at[index, 'height'] = h
    labels.at[index, 'width'] = w
    labels.at[index, 'ratio'] = w / h
    
    img = cv2.resize(img, OUTPUT_RESOLUTION)
    out_file = open(os.path.join(DATA_BASE, OUTPUT_PATH, os.path.basename(row.image)), 'wb')
    out_file.write(jpeg.encode(img))
    out_file.close()

    if index % (len(labels) // 100) == (len(labels) // 100 - 1):
        print('Progress: ', index)
    
print('Table size after filtering: ', len(labels))



Table size before filtering:  35126
Progress:  350
Progress:  701
Progress:  1052
Progress:  1403
Progress:  1754
Progress:  2105
Progress:  2456
Progress:  2807
Progress:  3158
Progress:  3509
Progress:  3860
Progress:  4211
Progress:  4562
Progress:  4913
Progress:  5264
Progress:  5615
Progress:  5966
Progress:  6317
Progress:  6668
Progress:  7019
Progress:  7370
Progress:  7721
Progress:  8072
Progress:  8423
Progress:  8774
Progress:  9125
Progress:  9476
Progress:  9827
Progress:  10178
Progress:  10529
Progress:  10880
Progress:  11231
Progress:  11582
Progress:  11933
Progress:  12284
Progress:  12635
Progress:  12986
Progress:  13337
Progress:  13688
Progress:  14039
Progress:  14390
Progress:  14741
Progress:  15092
Progress:  15443
Progress:  15794
Progress:  16145
Progress:  16496
Progress:  16847
Progress:  17198
Progress:  17549
Progress:  17900
Progress:  18251
Progress:  18602
Progress:  18953
Progress:  19304
Progress:  19655
Progress:  20006
Progress:  20357
Progress

In [63]:
labels.groupby('ratio')['image'].nunique()


KeyError: 'ratio'

In [45]:
labels['image'] = labels.image.map(lambda v: os.path.join(DATA_BASE, OUTPUT_PATH, os.path.basename(v)))

for index, row in labels.iterrows():
    try:
        in_file = open(row.image, 'rb')
        img = jpeg.decode(in_file.read())
        in_file.close() 
        #img = cv2.imread(row.image)
        
        cv2.imwrite(os.path.join(DATA_BASE, OUTPUT_PATH, f'{os.path.splitext(os.path.basename(row.image))[0]}.png'), img)
    except FileNotFoundError:
        labels.drop(index, inplace=True)

    if index % (len(labels) // 20) == (len(labels) // 20 - 1):
        print('Progress: ', index)

/data/simon/processed_retina_data/10_left.jpeg
Progress:  350
Progress:  701
Progress:  1052
Progress:  1403
Progress:  1754
Progress:  2105
Progress:  2456
Progress:  2807
Progress:  3158
Progress:  3509
Progress:  3860
Progress:  4211
Progress:  4562
Progress:  4913
Progress:  5264
Progress:  5615
Progress:  5966
Progress:  6317
Progress:  6668
Progress:  7019
Progress:  7370
Progress:  7721
Progress:  8072
Progress:  8423
Progress:  8774
Progress:  9125
Progress:  9476
Progress:  9827
Progress:  10178
Progress:  10529
Progress:  10880
Progress:  11231
Progress:  11582
Progress:  11933
Progress:  12284
Progress:  12635
Progress:  12986
Progress:  13337
Progress:  13688
Progress:  14039
Progress:  14390
Progress:  14741
Progress:  15092
Progress:  15443
Progress:  15794
Progress:  16145
Progress:  16496
Progress:  16847
Progress:  17198
Progress:  17549
Progress:  17900
Progress:  18251
Progress:  18602
Progress:  18953
Progress:  19304
Progress:  19655
Progress:  20006
Progress:  203

In [17]:
labels['image'] = labels.image.map(lambda v: os.path.join(DATA_BASE, OUTPUT_PATH, os.path.basename(v)))
print(labels.image.unique())
for index, row in labels.iterrows():
    try:
        in_file = open(row.image, 'rb')
        in_file.close() 
        #img = cv2.imread(row.image)
    except FileNotFoundError:
        labels.drop(index, inplace=True)

    if index % (len(labels) // 20) == (len(labels) // 20 - 1):
        print('Progress: ', index)

print(labels.image.unique())
labels['image'] = labels.image.map(lambda v: os.path.basename(v)[:-5])
print(labels.image.unique())
labels.to_csv(os.path.join(DATA_BASE, 'processed_trainLabels.csv'), index=False)

['/data/simon/processed_retina_data/10_left.jpeg'
 '/data/simon/processed_retina_data/10_right.jpeg'
 '/data/simon/processed_retina_data/13_left.jpeg' ...
 '/data/simon/processed_retina_data/44348_right.jpeg'
 '/data/simon/processed_retina_data/44349_left.jpeg'
 '/data/simon/processed_retina_data/44349_right.jpeg']
Progress:  1755
Progress:  3511
Progress:  5267
Progress:  7023
Progress:  8774
Progress:  10529
Progress:  12284
Progress:  14039
Progress:  15794
Progress:  17549
Progress:  19304
Progress:  21059
Progress:  22814
Progress:  24569
Progress:  26324
Progress:  28079
Progress:  29834
Progress:  31589
Progress:  33344
Progress:  35099
['/data/simon/processed_retina_data/10_left.jpeg'
 '/data/simon/processed_retina_data/10_right.jpeg'
 '/data/simon/processed_retina_data/13_left.jpeg' ...
 '/data/simon/processed_retina_data/44348_right.jpeg'
 '/data/simon/processed_retina_data/44349_left.jpeg'
 '/data/simon/processed_retina_data/44349_right.jpeg']
['10_left' '10_right' '13_left'