## Import libraries

In [10]:
import os
import numpy as np
import subprocess
import shutil
import time
import tifffile
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
import keras_efficientnet_v2
import keras
import statistics
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report

## Settings

In [11]:
zoom = "40"
diffRed = 13
dataPath = "DATA/HTX/"
toolsPath = "UTILS/"
evalPath = "EVALUATE"
modelPath = "MODELS/Model_02.hdf5"
pathProc = "ProcessedImagesHTX40.txt"
folders = ["0R","1R","2R","3R"]
batchSize = 320

## Helper functions

In [12]:
def readProcessedImages(pathProc):
    if os.path.isfile(pathProc):
        with open(pathProc) as fp:
            setOfPreImages = set(fp.read().splitlines())
    else:
        print('No ProcessedImages.txt found, converting all images in folder!')
        setOfPreImages = set()
    return setOfPreImages

In [13]:
def readDirFiles(inputPath):    
    setOfNDPIFiles = set()
    for file in os.listdir(inputPath):
        setOfNDPIFiles.add(os.path.join(inputPath, file))
    return setOfNDPIFiles

In [14]:
def filterOutFiles(setOfNDPIFiles, setOfPreImages):
    return  list(setOfNDPIFiles.difference(setOfPreImages))

In [15]:
def extractFileName(path):
    fileName = path.split("/")
    lenFile = len(fileName)-1
    fileName = fileName[lenFile]
    return fileName

In [16]:
def writeProcessedImages(file):
    with open(pathProc, "a") as myfile:
        myfile.write(file+'\n')

## Create tiles of the TIFF images

In [17]:
def createMosaicOfTIFF(outputPath, file, zoom, size):    
    fileName = extractFileName(file)
    if(os.path.exists(outputPath+fileName+"/"+zoom)):
        print("*** ALREADY EXTRACTED SKIPPING ***")
    else:
        print("Mosaic of " + fileName + " X"+ zoom +" started.")
        os.mkdir(outputPath+fileName+"/"+zoom)
        shutil.copy(outputPath+fileName+"/"+fileName+"_x"+zoom+"_z0.tif", outputPath+fileName+"/"+zoom+"/")
        arguments = [toolsPath+"tiffmakemosaic","-g",size+"x"+size, outputPath+fileName+"/"+zoom+"/"+fileName+"_x"+zoom+"_z0.tif"]
        subprocess.call(arguments)
        os.remove(outputPath+fileName+"/"+zoom+"/"+fileName+"_x"+zoom+"_z0.tif")
        print("Mosaic of " + fileName + " X"+ zoom +" done!")

In [18]:
for f in folders:
    print("Tiling subfolder: "+str(f))
    folderPath = os.path.join(dataPath,f+"/")
    start_time = time.time()
    ndpi = readDirFiles(folderPath)
    proc = readProcessedImages(pathProc)
    ndpis = filterOutFiles(ndpi, proc)
    for ndpi in ndpis:
        start_time2 = time.time()
        createMosaicOfTIFF(folderPath, ndpi, zoom, "300")
        writeProcessedImages(ndpi)
        print("*** %s minutes ***" % ((time.time() - start_time2)/60)) 

Tiling subfolder: 0R
Tiling subfolder: 1R
Tiling subfolder: 2R
Tiling subfolder: 3R


## Filter tiles to keep only those with tissue

In [19]:
def filterTissue(rgb, dif):
    red = rgb[:,:,0].mean()
    green = rgb[:,:,1].mean()
    blue = rgb[:,:,2].mean()
    diffGreen = (red > (green + dif)) or (red < (green - dif))
    diffBlue = (red > (blue + dif)) or (red < (blue - dif))
    return diffGreen or diffBlue

In [30]:
def removeBlankTiles(folderPath, file, zoom, dif, folder):
    start_time = time.time()
    fileName = extractFileName(file)
    pathInput = folderPath+fileName+"/"+zoom+"/"
    pathOutput = evalPath+"/"+folder[0]+"/"+fileName
    os.mkdir(pathOutput)
    print(pathOutput)
    num = 0
    for file in os.listdir(pathInput):
        if file.endswith(".tif"):
            inPath = os.path.join(pathInput, file)
            outPath = os.path.join(pathOutput, file)
            rgb = tifffile.imread(inPath)
            bol = filterTissue(rgb,dif)
            if(bol):
                shutil.copy(inPath,outPath)
                num += 1
    if num<1:
        print("*** Anamoly, none of the tiles are kept! ***")
    print("Kept "+str(num)+ " files, for "+fileName+" X"+zoom+"!")
    print("--- %s minutes ---" % ((time.time() - start_time)/60))

In [31]:
for f in folders:
    print("Filtering subfolder: "+str(f))
    folderPath = os.path.join(dataPath,f+"/")
    ndpis = readDirFiles(folderPath)
    for ndpi in ndpis:
        removeBlankTiles(folderPath, ndpi, zoom, diffRed, f)
        writeProcessedImages(ndpi)

Filtering subfolder: 0R
EVALUATE/0/18PL25788-01-02-01 - 2020-02-24 03.57.32
Kept 3256 files, for 18PL25788-01-02-01 - 2020-02-24 03.57.32 X40!
--- 1.4796868085861206 minutes ---
EVALUATE/0/19PL20427-01-02-03 - 2020-03-02 19.06.26
Kept 2066 files, for 19PL20427-01-02-03 - 2020-03-02 19.06.26 X40!
--- 0.651185139020284 minutes ---
EVALUATE/0/16PL27463-01-01-01 - 2020-02-18 19.29.28
Kept 50900 files, for 16PL27463-01-01-01 - 2020-02-18 19.29.28 X40!
--- 1.5426672577857972 minutes ---
EVALUATE/0/14062 01 - 2020-02-18 06.40.29
Kept 5101 files, for 14062 01 - 2020-02-18 06.40.29 X40!
--- 1.9209152936935425 minutes ---
Filtering subfolder: 1R
EVALUATE/1/PL10444 1 HTX - 2019-12-16 18.56.57
Kept 3194 files, for PL10444 1 HTX - 2019-12-16 18.56.57 X40!
--- 1.9783202568689982 minutes ---
EVALUATE/1/P842 B HTX - 2019-12-13 13.06.18
Kept 3372 files, for P842 B HTX - 2019-12-13 13.06.18 X40!
--- 0.7124816536903381 minutes ---
EVALUATE/1/PL9625 4 HTX - 2019-12-17 16.39.44
Kept 559 files, for PL9625 4

## Predict and evaluate images

In [None]:
def evaluate(model,valPath):
    testDataGen = ImageDataGenerator()
    nZero = len(next(os.walk(os.path.join(valPath, str(0))))[1])
    nOne = len(next(os.walk(os.path.join(valPath, str(1))))[1])
    nTwo = len(next(os.walk(os.path.join(valPath, str(2))))[1])
    nThree = len(next(os.walk(os.path.join(valPath, str(3))))[1])
    nTest = [nZero,nOne,nTwo,nThree]
    real = list()
    pred = list()   
    for i in range(4):
        #print("Started validating cat: "+str(i))
        for n in range(nTest[i]):
            path = os.path.join(valPath, str(i),str(n))
                #print(path)
            real.append(i)
            testGen = testDataGen.flow_from_directory(
                path,
                batch_size=batchSize,
                class_mode=None,
                target_size=(300,300),
                classes=[''])
            preds = model.predict(testGen, verbose = 1)
            pred.append(preds)
    accMoMax = 0
    f1MoMax = 0
    moMax = list()
    n1 = 0
    n2 = 0
    for n in range(2,101):
        mo = list()
        for i in range(len(pred)):
            arr = pred[i]
            length = len(arr)
            m = n
            if (length<m):
                m = length
            ind = np.argpartition(arr.ravel(), -m)[-m:]
            row_indices, col_indices = np.unravel_index(ind, arr.shape)
            high = arr[row_indices,:]
            preds_max = np.argmax(high, axis=1)
            mo.append(statistics.mode(preds_max))
        accMo = accuracy_score(real,mo)
        f1Mo = f1_score(real, mo, average="macro")
        if accMo > accMoMax:
            accMoMax = accMo
            n1 = n
            moMax = mo
        if f1Mo > f1MoMax:
            f1MoMax = f1Mo
            n2 = n
    print("accMoMax: "+str(accMoMax) + " at N = "+ str(n1))
    print("f1MoMax: "+str(f1MoMax) + " at N = "+ str(n2))
    print(classification_report(real, moMax))
    pathRes = os.path.join("Result.txt")
    resultFile = open(pathRes, "a")
    resultFile.write("*** RESULTS ***\n")
    resultFile.write("accMoMax: "+str(accMoMax) + " at N = "+ str(n1)+"\n")
    resultFile.write("f1MoMax: "+str(f1MoMax) + " at N = "+ str(n2)+"\n\n")
    resultFile.write(str(classification_report(real, moMax)))
    resultFile.write("\n\n")
    return f1MoMax, accMoMax

In [None]:
model = keras_efficientnet_v2.EfficientNetV2S(num_classes=4,pretrained="imagenet21k-ft1k", include_preprocessing=True)
model.load_weights(modelPath)
print("*** Weights loaded:" + modelPath + "***")
model.compile(
    optimizer='adam',
    loss = 'categorical_crossentropy',
    metrics=['categorical_accuracy'],
    )

f1, acc = evaluate(model,evalPath)