This file looks at producing example images

Let's try to sharpen the images or look at the surrounding area of each pixel to highlight areas that may have an issue. Then let's look at
trying to quantify the noise in each image.

In [39]:
# Reading the data

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pathlib
from copy import copy
from matplotlib import cm, colors
import cv2

emptyRatio = 47
objectwidth = 83
objectheight = 122
xspacing = 133
yspacing = 270
xstart = 293
ystart = 268
xend = 1730
yend = 1770
powderthickness = 80
endlayer = 187
objectsplit = 4
layersPerObject = endlayer // objectsplit
testEnd = endlayer - layersPerObject

paths = pathlib.Path('./OT data 80 um/int').glob('*.tif')
paths_sorted = [x for x in paths]
paths_sorted.sort()
integrals = np.array([np.array(plt.imread(path)) for path in paths_sorted])[0:testEnd]
testintegrals = np.array([np.array(plt.imread(path)) for path in paths_sorted])[testEnd:endlayer]

objectinfo = pd.read_csv('Parameters.csv', names=["Object", "P", "S", "H", "Porosity", "Label"])
objectinfo.insert(1, "VED", objectinfo.P * 1000/(objectinfo.S * objectinfo.H * powderthickness))
objectCoordinates = [[x, x+objectwidth, y, y+objectheight] for y in reversed(range(
    ystart, yend, objectheight + yspacing)) for x in range(xstart, xend, xspacing + objectwidth)]
coorddf = pd.DataFrame(objectCoordinates, columns=['xstart', 'xend', 'ystart', 'yend'])
objectinfo = coorddf.join(objectinfo)
zs = [objectinfo.copy().assign(zstart=z, zend=z+layersPerObject) for z in range(0, testEnd-layersPerObject, layersPerObject)]

testobjectinfo = objectinfo.copy().assign(zstart=0, zend=layersPerObject)
objectinfo = pd.concat(zs)

del coorddf
del objectCoordinates
del paths_sorted

In [40]:
from sklearn import neighbors, metrics
from sklearn.model_selection import cross_val_score, LeaveOneOut
from sklearn import preprocessing
from datetime import datetime

def preprocess(integrals, objectinfo, type, sharpening,  windowSize):
    sharpeningKernel = np.array([   [-1, -1,  -1],
                                    [-1,  9,  -1],
                                    [ -1, -1,  -1]
    ]) if sharpening == 'diagonal' else np.array([  [0, -1,  0],
                                                    [-1, 5, -1],
                                                    [0, -1,  0]])
    sharpened = np.array([cv2.filter2D(src=image, ddepth=-1, kernel=sharpeningKernel) for image in integrals])
# Sharpening is done
    filtered = np.full(np.shape(integrals), np.nan)
    for index, object in objectinfo.sort_values(by=['VED']).iterrows():
        if type == 'scatter':
            xs = np.array(sharpened[object.zstart:object.zend, object.ystart:object.yend, object.xstart:object.xend], copy=True, dtype=np.float32)
            sum = np.sum(xs, axis=0)
            limit = np.percentile(sum, emptyRatio)
            filter = sum >= limit
            (endLayer, _, _) = xs.shape
            filter = np.repeat([filter], endLayer, 0)
            xs[~filter] = np.nan
            filtered[object.zstart:object.zend, object.ystart:object.yend, object.xstart:object.xend] = xs
        elif type == 'moran':
            xs = sharpened[object.zstart:object.zend, object.ystart:object.yend, object.xstart:object.xend]
            sum = np.sum(xs, axis=0)
            limit = np.percentile(sum, emptyRatio)
            filter = sum >= limit
            (endLayer, _, _) = xs.shape
            filter = np.repeat([filter], endLayer, 0)
            avg = np.mean(xs, where=filter)
            stddev = np.std(xs, where=filter)
            xs = (xs - avg) / avg
            xs[~filter] = np.nan
            filtered[object.zstart:object.zend, object.ystart:object.yend, object.xstart:object.xend] = xs
        elif type == 'spatstat':
            # Implement later
            print("nyi")
    return sharpened

def makeNeighbourhood(sharpened, neighbourhood, windowSize):
    neighbourkernel = np.array(
        [[1, 1, 1],
         [1, 1, 1],
         [1, 1, 1]]
    )/9 if neighbourhood == 'grid' else np.array(
        [[1, 2,  1],
        [2, 4, 2],
        [1, 2,  1]])/16
    flatNeighbourhood = np.array([cv2.filter2D(src=layer, ddepth=-1, kernel=neighbourkernel) for layer in sharpened])
    (endLayer, _, _) = flatNeighbourhood.shape
    neighbourhoodValues = np.array([
        np.sum(flatNeighbourhood[layerIndex-windowSize:layerIndex], axis=0)/windowSize
        for layerIndex in range(windowSize, endLayer+1)
    ])
    return neighbourhoodValues

def encode(sharpened, objectinfo, neighbourhoodValues, windowSize, type, buckets, histnormalise):
    offset = windowSize // 2
    init = False

    if type == 'spatstat':
        print("nyi")
    else:
        outliervalues = np.full(np.shape(neighbourhoodValues), 0.0)
        for index, object in objectinfo.iterrows():
            if object.Label == "KH":
                continue
            xs = sharpened[object.zstart+offset:object.zend-windowSize+offset+1, object.ystart:object.yend, object.xstart:object.xend]
            ys = neighbourhoodValues[object.zstart:object.zend-windowSize+1, object.ystart:object.yend, object.xstart:object.xend]
            filter = np.logical_and(np.isfinite(xs), np.isfinite(ys))
            line = np.polyfit(ys[filter].flatten(), xs[filter].flatten(), 1)
            p = np.poly1d(line)
            outliervalues[object.zstart:object.zend-windowSize+1, object.ystart:object.yend, object.xstart:object.xend] = xs - p(ys)
        minval = np.min(outliervalues)
        maxval = np.max(outliervalues)
        for index, object in objectinfo.iterrows():
            if object.Label == "KH":
                continue
            xs = neighbourhoodValues[object.zstart:object.zend, object.ystart:object.yend, object.xstart:object.xend]
            filter = np.isfinite(xs)
            label = object.Label
            hist, _ = np.histogram(xs[filter], bins=buckets, range=(minval, maxval))
            if init == False:
                X = np.array([hist])
                Y = np.array([0 if label == 'GOOD' else 1])
                init = True
            else:
                X = np.append(X, np.array([hist]), 0)
                Y = np.append(Y, np.array([0 if label == 'GOOD' else 1]), 0)
        
        if (histnormalise == 'column'):
            X = preprocessing.normalize(X, axis=0)
        elif (histnormalise == 'row'):
            X = preprocessing.normalize(X, axis=1)
    return X, Y

def classify(Xtrain, Ytrain, Xtest, Ytest, n_neighbors):
    clf = neighbors.KNeighborsClassifier(n_neighbors, weights="distance")
    clf.fit(Xtrain, Ytrain)
    yfit = clf.predict(Xtest)
    return metrics.roc_auc_score(Ytest, yfit), metrics.precision_score(Ytest, yfit), metrics.recall_score(Ytest, yfit)

In [4]:
# Tweaking parameter settings
types = ['scatter', 'moran']
# types = ['scatter', 'moran', 'spatstat']
sharpening = ['direct', 'diagonal']
neighbourhoodSetting = ['grid', 'euclidean']
windowsizes = range(1, 8, 2)
buckets = range(30, 151, 30)
histnormalise = ('none', 'column', 'row')
#bucket-lower-limit?
k = range(2, 7)

In [47]:
# Now to tie it all together...
columns = ['type', 'sharpening', 'windowSize', 'neighbourhood', 'buckets', 'histnormalise', 'k-nearest', 'auc', 'precision', 'recall']
results = pd.DataFrame(columns=columns)
for type in types:
    print(datetime.now(), "type: ", type)
    for sharpSetting in sharpening:
        print(datetime.now(), "sharpSetting: ", sharpSetting)
        for windowsize in windowsizes:
            print(datetime.now(), "windowsize: ", windowsize)
            preprocessed = preprocess(integrals, objectinfo, type, sharpSetting, windowsize)
            testpreprocessed = preprocess(testintegrals, testobjectinfo, type, sharpSetting, windowsize)
            for nSetting in neighbourhoodSetting:
                print(datetime.now(), "nSetting: ", nSetting)
                neighbourhood = makeNeighbourhood(preprocessed, nSetting, windowsize) 
                testneighbourhood = makeNeighbourhood(testpreprocessed, nSetting, windowsize)
                for histnorm in histnormalise:
                    for bucket in buckets:
                        print(datetime.now(), "histnorm: ", histnorm, "bucket: ", bucket)
                        Xtrain, Ytrain = encode(preprocessed, objectinfo, neighbourhood, windowsize, type, bucket, histnorm)
                        Xtest, Ytest = encode(testpreprocessed, testobjectinfo, testneighbourhood, windowsize, type, bucket, histnorm)
                        # Add encode for test as well
                        for n_neighbors in k:
                            score, precision, recall = classify(Xtrain, Ytrain, Xtest, Ytest, n_neighbors)
                            nextRow =  pd.DataFrame([[type, sharpSetting, windowsize, nSetting, bucket, histnorm, n_neighbors, score, precision, recall]], columns=columns)
                            results = pd.concat([results, nextRow])
                        results.to_csv('out.csv', index=False, header=True)


2022-09-12 14:46:21.638690 type:  scatter
2022-09-12 14:46:21.638812 sharpSetting:  direct
2022-09-12 14:46:21.638834 windowsize:  1
2022-09-12 14:46:25.298058 nSetting:  grid
2022-09-12 14:46:33.378717 histnorm:  none bucket:  30
2022-09-12 14:46:41.970380 histnorm:  none bucket:  60
2022-09-12 14:46:49.890558 histnorm:  none bucket:  90
2022-09-12 14:46:58.148504 histnorm:  none bucket:  120
2022-09-12 14:47:06.247923 histnorm:  none bucket:  150
2022-09-12 14:47:14.247583 histnorm:  column bucket:  30
2022-09-12 14:47:22.121833 histnorm:  column bucket:  60
2022-09-12 14:47:30.649623 histnorm:  column bucket:  90
2022-09-12 14:47:39.582483 histnorm:  column bucket:  120
2022-09-12 14:47:48.144074 histnorm:  column bucket:  150
2022-09-12 14:47:56.557859 histnorm:  row bucket:  30
2022-09-12 14:48:05.026950 histnorm:  row bucket:  60
2022-09-12 14:48:13.665965 histnorm:  row bucket:  90
2022-09-12 14:48:22.125866 histnorm:  row bucket:  120
2022-09-12 14:48:30.562606 histnorm:  row b

In [2]:
import multiprocessing

def sumall(value):
    return sum(range(1, value + 1))

pool_obj = multiprocessing.Pool()

answer = pool_obj.map(sumall,range(0,5))
print(answer)

Process SpawnPoolWorker-3:
Process SpawnPoolWorker-1:
Process SpawnPoolWorker-4:
Process SpawnPoolWorker-5:
Process SpawnPoolWorker-2:
Traceback (most recent call last):
  File "/opt/homebrew/Cellar/python@3.9/3.9.14/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/opt/homebrew/Cellar/python@3.9/3.9.14/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/homebrew/Cellar/python@3.9/3.9.14/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/opt/homebrew/Cellar/python@3.9/3.9.14/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/queues.py", line 367, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'sumall' on <module '__main__' (built-in)>
Traceback (most recent call last):
  

KeyboardInterrupt: 