## Inference on CLSA dataset

This file is part of the Glaucoma Phenotype ML Estimation project.

 Glaucoma Phenotype ML Estimation is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.


The Glaucoma Phenotype ML Estimation project is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with the Glaucoma Phenotype ML Estimation project.  If not, see <http://www.gnu.org/licenses/>.


## Parsing Prelimanry information

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
# Change this as required
import os
os.chdir('/Users/kaiahsteven/Desktop/Work/IOP/glaucoma/glaucoma_ML')

In [5]:
import pickle as pkl
import os
from pathlib import Path
from multiprocessing import Pool, cpu_count
import numpy as np
import pandas as pd
import zipfile
#import imageio
import matplotlib.pyplot as plt
import seaborn as sns
from fastai.vision import *
from PIL import Image
from IPython.display import display
import zipfile
from fastai.distributed import *
from glaucoma.helpers.glaucoma_helpers import *
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [6]:
##### PLEASE SET AS REQUIRED########
WORKING_DIR = Path(os.getcwd())
DATA_DIR = WORKING_DIR / 'data'
META_DIR = DATA_DIR / 'metadata'
TRAIN_DIR = DATA_DIR / 'train'
UKBB_DIR = DATA_DIR 'retinal_images/UKBB'
CROP_DIR = DATA_DIR /'retinal_images/cropped_UKBB')
C_CLSA_DIR = DATA_DIR / 'retinal_images/cropped_CLSA')
GRADE_DIR = DATA_DIR / "gradable"
CLSA_DIR =DATA_DIR / 'retinal_images/CLSA')

### Unzipping files

In [21]:
zipped_files = parse_files(CLSA_DIR)
f_names = [os.path.basename(zf)[0:-4] for zf in zipped_files]

In [23]:
baseline = CLSA_DIR / f_names[0]/ '190225'
followup = CLSA_DIR / f_names[1]/'190225'

In [24]:
baseline_right = parse_files(baseline, 'right')
baseline_left = parse_files(baseline, 'left')
followup_right = parse_files(followup, 'right')
followup_left = parse_files(followup, 'left')

In [26]:
im = Image.open(baseline_right[0])
width, height = im.size 

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
#img=mpimg.imread(ukbb_images_right[0])
imgplot = plt.imshow(im)
plt.show()

In [128]:
h1 = (1536-350-800)/1536
h2 = (1536-350)/1536
w1 = (2048 - 1040 -200)/2048
w2 = (2048 - 200)/2048
hc_1 = height * h1
hc_2 = height * h2
wc_1 = width * w1
wc_2 = width * w2


In [129]:
same_ratio_w = 1080 * width/2048
same_ratio_h = 800 *width/2048

In [130]:
hc_1 =200 * width/2048
wc_1 = 350 *width/2048

In [105]:
def open_CLSA(f_name, crop =True, left =True ):
    im = Image.open(f_name)
    width, _ = im.size
    if crop:
        same_ratio_w = 1080 * width/2048
        same_ratio_h = 800 *width/2048
        hc_1 = 200 * width/2048
        if left:
            wc_1 = 350 *width/2048
        else:
            wc_1 = 800 *width/2048
        width_offset = 0
        left =  wc_1 +width_offset
        top = hc_1
        right = wc_1 +same_ratio_w +width_offset
        bottom = hc_1 + same_ratio_h 
        im = im.crop((left,top,right,bottom))
    plt.imshow(im)

In [74]:
im = Image.open(baseline_left[10])
width_offset = 0
left =  wc_1 +width_offset
top = hc_1
right = wc_1 +same_ratio_w +width_offset
bottom = hc_1 + same_ratio_h 
iml = im.crop((left,top,right,bottom))

In [None]:
plt.imshow(iml)

In [80]:
## Cropping files
width_offset = 0
left = wc_1 + width_offset
top = hc_1
right = wc_1 +same_ratio_w +width_offset
bottom = hc_1 + same_ratio_h 
save_path = C_CLSA_DIR / 'baseline_left'
crop_files(baseline_left,(left,top,right,bottom),save_path,num_cpu)

# Inference

## Running gradabillity model

In [9]:
path_img = GRADE_DIR

In [None]:
#Load the gradabillity models
src = ImageList.from_folder(str(path_img)).split_by_rand_pct(seed=42).label_from_folder()
tfms = get_transforms( max_lighting = 0.25) # or tfms=None if none are needed
size=(800,1040) # size=(224,224) or (400,224)
data = src.transform(tfms=tfms, size=size, resize_method=ResizeMethod.SQUISH).databunch(num_workers=4).normalize(imagenet_stats)


In [85]:
learn_grade = cnn_learner(data,models.resnet34, pretrained = True)

In [None]:
learn_grade.load('gradable_res34_removed_best_heat_2')

In [88]:
#load actual full UKBB data
crop_paths = [C_CLSA_DIR / 'baseline_left',
              C_CLSA_DIR / 'baseline_right',
              C_CLSA_DIR / 'followup_left',
              C_CLSA_DIR / 'followup_right'
             ]

In [89]:
crop_names = [os.path.basename(pth) for pth in crop_paths]

In [90]:
test_sets = [ImageList.from_folder(path) for path in crop_paths]

In [91]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="torch.nn.functional")

In [92]:
gr_preds = {}

In [93]:
for i,test in enumerate(test_sets):
    learn_grade.data.add_test(test)
    pred = learn_grade.get_preds(DatasetType.Test)
    gr_preds[os.path.basename(crop_paths[i])] = pred

In [94]:
save_path = WORKING_DIR /"CLSA_gradabillity_predictions_v2.pkl"
pkl.dump(gr_preds,open(save_path,'wb'))
#preds = pkl.load(open(save_path, 'rb'))

In [96]:
gg_preds = pkl.load(open(save_path,'rb'))

### VCDR model

In [98]:
path_img = TRAIN_DIR

In [99]:
src = ImageList.from_folder(str(path_img)).split_by_rand_pct(seed=42).label_from_func(lambda o: (o.parts if isinstance(o, Path) else o.split(os.path.sep))[-2],label_cls=FloatList)
tfms = get_transforms(max_rotate= 10,flip_vert =True, max_lighting = 0.1) # or tfms=None if none are needed
size=(800,1040) # size=(224,224) or (400,224)
data = src.transform(tfms=tfms, size=size, resize_method=ResizeMethod.SQUISH).databunch(num_workers=4).normalize(imagenet_stats)

In [100]:
learn_vcdr = cnn_learner(data,models.resnet34, pretrained = True)

In [None]:
learn_vcdr.load('g_model_e9_stage_2')

In [None]:
vc_preds = {}
for i,test in enumerate(test_sets):
    learn_vcdr.data.add_test(test)
    pred = learn_vcdr.get_preds(DatasetType.Test)
    vc_preds[os.path.basename(crop_paths[i])] = pred

In [None]:
save_path = WORKING_DIR /"CLSA_vcdr_predictions_2.pkl"
pkl.dump(vc_preds,open(save_path,'wb'))
#preds = pkl.load(open(save_path, 'rb'))

## Building DataFrame

In [134]:
combined_test_set = {}
for i,ts in enumerate(test_sets):
    ts_list = list(ts.items)
    ts_list = [os.path.basename(x) for x in ts_list]
    combined_test_set[crop_names[i]] = ts_list
    

In [135]:
def build_df_result(file_names, preds, preds2):
    
    file_names = np.asarray(file_names)
    file_names = file_names.reshape(len(file_names),1)
    pred_arr = preds.numpy()
    preds2_arr = preds2.numpy()
    print(file_names.shape)
    print(preds.shape)
    print(preds2.shape)
    full_arr = np.concatenate((file_names,pred_arr,preds2_arr),axis=1)
    df = pd.DataFrame(full_arr)
    df.set_index(0,inplace = True)
    df = df.rename(columns = {0:'file',1:'gradable',2:'ungradable',3:'vcdr_estimate'})
    df.index.rename("file_name",inplace =True)
    return df
    
    
    

In [136]:
keys = list(combined_test_set.keys())
df_dict = {}
for key in keys:
    print(key)
    df = build_df_result(combined_test_set[key], gr_preds[key][0],vc_preds[key][0])
    df_dict[key] = df

baseline_left
(28638, 1)
torch.Size([28638, 2])
torch.Size([28638, 1])
baseline_right
(28739, 1)
torch.Size([28739, 2])
torch.Size([28739, 1])
followup_left
(24362, 1)
torch.Size([24362, 2])
torch.Size([24362, 1])
followup_right
(24591, 1)
torch.Size([24591, 2])
torch.Size([24591, 1])


In [142]:
df_full = pd.concat([i for i in df_dict.values()])

In [171]:
df_full_baseline = pd.concat([df_dict['baseline_left'],df_dict['baseline_right']])
df_full_followup = pd.concat([df_dict['followup_left'],df_dict['followup_right']])

In [173]:
df_full_baseline['follow_up'] = 0
df_full_followup['follow_up'] = 1

In [176]:
df_full = pd.concat([df_full_baseline.reset_index(), df_full_followup.reset_index()])

In [178]:
df_full.to_csv(WORKING_DIR /"CLSA_full_inference_v2.csv")

In [179]:
df_full[pd.to_numeric(df_full['vcdr_estimate']) > 10]

Unnamed: 0,file_name,gradable,ungradable,vcdr_estimate,follow_up
1845,190225_QIMR_SMacGregor_6344668_left.jpg,0.99500895,0.0049910997,10.615381,0
5287,190225_QIMR_SMacGregor_5185392_left.jpg,0.9477532,0.052246768,10.053989,0
8423,190225_QIMR_SMacGregor_8151706_left.jpg,0.9423977,0.057602316,10.179149,0
14227,190225_QIMR_SMacGregor_2009514_left.jpg,0.9906679,0.009332126,10.302616,0
16758,190225_QIMR_SMacGregor_5836860_left.jpg,0.9930233,0.0069767567,10.442637,0
23870,190225_QIMR_SMacGregor_5364897_left.jpg,0.96118855,0.03881139,10.268616,0
27890,190225_QIMR_SMacGregor_3508853_left.jpg,0.9940221,0.005977941,10.038216,0
8808,190225_QIMR_SMacGregor_9471889_left.jpg,0.5280808,0.47191918,10.567476,1
14290,190225_QIMR_SMacGregor_5836860_left.jpg,0.9944944,0.0055056387,10.79579,1
22895,190225_QIMR_SMacGregor_6032271_left.jpg,0.9853279,0.014672048,10.297196,1


In [268]:
pkl.dump(df_dict,open(WORKING_DIR / "UKBB_inference_python_dictionaries.pkl",'wb'))