In [1]:
import numpy as np
import numpy.lib.stride_tricks as slide
import imageio as iio
import multiprocessing
import re
import io
import math
import glob
import tools

class Sample:
    def __init__(self, path):
        self.path = path
        classMatch = re.search('[\\\/]([a-zA-Z]+)(\d+)\.(\w+)', path)
        self.klass = classMatch.group(1).lower()
        self.number = int(classMatch.group(2))
        self.extension = classMatch.group(3)
        self.klassId = tools.CLASSES.index(self.klass)
        
    def __str__(self):
        return f'{self.path} {self.klass} {self.number}'
    def __repr__(self):
        return f'{self.path} {self.klass} {self.number}'
        
    def readImage(self):
        return iio.imread(self.path)
    
    def writeImage(self, buffer, base, extension):
        if not extension:
            extension = self.extension
        iio.imgwrite(f'{base}/{self.klass}{str(self.number).zfill(3)}.{extension}', buffer)
        


def buildSampleList(pathGlob):
    samples = []
    for i, filePath in enumerate(glob.glob(pathGlob)):
        samples.append(Sample(filePath))
    return samples;
        


In [305]:

def kFold(grid, classifier, k=10, stratify=False):
    data = np.random.shuffle(grid.clone())
    for i in range(0,k):
        test = data[i]
        train = np.delete(data, k, axis=0)
        
def KNN(reference, challenge, k = 10):
    """hi"""

def compareOverlay(base, oppose):
    return np.count(base - oppose) / np.count(base)

def extractSegmentFeatures(patch, steps = 10):
    step = 2.0*math.pi / steps
    accum = 0
    origSize = np.count_nonzero(patch)
    side = np.max(patch.shape)
    padding = ((side - patch.shape[0] + side) // 2, (side - patch.shape[1] + side) // 2)
    padded = np.pad(patch, padding)
    com = scipy.ndimage.center_of_mass(padded)
    line = np.zeros(padded.shape)
    line[round(com[0])] = 1
    perim = np.count_nonzero(padded ^ scipy.ndimage.binary_erosion(padded))
    
    
    
    diameters = []
    for i in range(steps):
        newPatch = scipy.ndimage.rotate(padded, i * step, reshape=False)
        accum = accum + np.count_nonzero(padded * newPatch)
        diameters.append(np.count_nonzero(newPatch * line))
    dMin = np.min(diameters)
    dMax = np.max(diameters)
    
    return [com[0], com[1], accum /  steps, dMin, dMax, dMax / dMin, perim]

def averageIntensity(object, patch, gray):
    np.mean(gray[object] * patch.astype(np.uint8))
    
def averageColor(object, patch, color):
    np.mean(color[object] * patch.astype(np.uint8), axis=(0,1))

def com(patch):
    cm = scipy.ndimage.center_of_mass(patch)
    return math.sqrt(cm[0] ** 2 + cm[1] ** 2)
    


In [373]:

import matplotlib.pylab as plt
import matplotlib.pyplot as plot
import scipy;
samples = buildSampleList('./images/*')

def medianFilter(input, kernel, mode='edge'):
    slid = tools.slideKernel(input, kernel, mode)
    flat = slid.reshape((slid.shape[0], slid.shape[1], slid.shape[2] * slid.shape[3]))
    
    return np.median(flat, axis=2)


#samples = np.random.choice(samples, 20)
medianKernel = np.ones((7,7))
discMask = tools.makeBooleanDisc(5)

def area(sliceT, img):
    return np.asarray(img[sliceT]).size
    

def processAndPlot(sample):
    original = sample.readImage()
    #buffer = np.zeros(img.shape[0:2], dtype=img.dtype)
    
    # img = tools.rgbToLuminosity(original)
    # img = tools.autoThresholdSegmentation(img)
    # img = medianFilter(img, medianKernel)
    # img = tools.shrinkAndGrow(img, discMask, steps=3)
    # img = tools.growAndShrink(img, discMask, steps=2)
    # img = tools.shrinkAndGrow(img, discMask, steps=2)
    
    gray = tools.rgbToLuminosity(original)
    img = scipy.ndimage.median_filter(gray, size=5)
    img = tools.autoThresholdSegmentation(img)
    #img = tools.shrinkAndGrow(img, discMask, steps=3).astype(np.uint8)
    img = scipy.ndimage.binary_erosion(img, discMask, 5)
    img = scipy.ndimage.binary_dilation(img, discMask, 5)
    
    markerSource = scipy.ndimage.binary_erosion(img, discMask, 5)
    markers, _ = scipy.ndimage.label(markerSource)
    
    
    img = img.astype(np.uint8)
    
    # xm, ym = np.ogrid[0:img.shape[0]:10, 0:img.shape[1]:10]
    # markers = np.zeros_like(img).astype(np.int16)
    # markers[xm, ym]= np.arange(xm.size*ym.size).reshape((xm.size,ym.size))
    img = scipy.ndimage.watershed_ift(img, markers)
    #img[xm, ym] = img[xm-1, ym-1]
    
    objects = scipy.ndimage.find_objects(img)
    #objects = objects[type(objects) is tuple]
    minXLen = 10
    minYLen = 10
    maxXLen = img.shape[0] // 1.9
    maxYLen = img.shape[1] // 1.9
    objects = [a for a in objects 
               if a != None and 
               (a[0].stop - a[0].start) > minXLen and 
               (a[0].stop - a[0].start) < maxXLen and 
               (a[1].stop - a[1].start) > minYLen and 
               (a[1].stop - a[1].start) < maxYLen]
    areas = [(a[0].stop - a[0].start) * (a[1].stop - a[1].start) for a in objects]
    areaIndices = np.argsort(areas);
    objects = [objects[i] for i in areaIndices]
    
    if len(objects) > 7:
        midIndex = len(objects)//2
        objects = objects[midIndex:]
        
    
    patches = [img[obj] for obj in objects]
    patches = [np.choose(p  == p[patches[0].shape[0]//2, patches[0].shape[1]//2], [0,1]) for p in patches]
    
    try:
        meanIntensity = np.mean(gray[objects[0]] * patches[0].astype(np.uint8))
    except:
        meanIntensity = gray[gray.shape[0] // 2, gray.shape[1] // 2]
    try:
        meanColor = np.mean(original[objects[0]] * tools.intensityToRgb(patches[0].astype(np.uint8)), axis=(0,1))
    except:
        meanColor = original[original.shape[0] // 2, original.shape[1] // 2]
    
    features = []
    for patch in patches:
        meanIntensity = np.mean(gray[objects[0]] * patches[0].astype(np.uint8))
        meanColor = np.mean(original[objects[0]] * tools.intensityToRgb(patches[0].astype(np.uint8)), axis=(0,1))
        featureVec = extractSegmentFeatures(patch)
        featureVec.append(meanIntensity)
        featureVec.append(meanColor[0])
        featureVec.append(meanColor[1])
        featureVec.append(meanColor[2])
        featureVec.append(sample.klassId)
        if len(features) == 0: 
            features = np.asarray(featureVec)
        else:
            features[0] += featureVec[0]
            features[1] += featureVec[1]
            features[2] = min(features[2], featureVec[2])
            features[3] += featureVec[3]
            features[4] += featureVec[4]
            features[5] += featureVec[5]
            features[6] += featureVec[6]
            
            features[7] += featureVec[7]
            features[8] += featureVec[8]
            features[9] += featureVec[9]
            features[10] += featureVec[10]
            
            
    length = len(patches)
    features[0] /= length
    features[1] /= length

    features[3] /= length
    features[4] /= length
    features[5] /= length
    features[6] /= length
    features[7] /= length
    features[8] /= length
    features[9] /= length
    features[10] /= length
    
    
    plt.figure()
    plt.title(sample.path + ' original')
    plt.imshow(original)
    
    plt.figure()
    plt.title(sample.path)
    plt.imshow(img, cmap='gray')
    
    for patch in patches:
        plt.figure()
        plt.imshow(patch, cmap='gray')
        
    return objects


def process(sample):
    original = sample.readImage()
    #buffer = np.zeros(img.shape[0:2], dtype=img.dtype)
    
    # img = tools.rgbToLuminosity(original)
    # img = tools.autoThresholdSegmentation(img)
    # img = medianFilter(img, medianKernel)
    # img = tools.shrinkAndGrow(img, discMask, steps=3)
    # img = tools.growAndShrink(img, discMask, steps=2)
    # img = tools.shrinkAndGrow(img, discMask, steps=2)
    
    gray = tools.rgbToLuminosity(original)
    img = scipy.ndimage.median_filter(gray, size=5)
    img = tools.autoThresholdSegmentation(img)
    #img = tools.shrinkAndGrow(img, discMask, steps=3).astype(np.uint8)
    img = scipy.ndimage.binary_erosion(img, discMask, 5)
    img = scipy.ndimage.binary_dilation(img, discMask, 5)
    
    markerSource = scipy.ndimage.binary_erosion(img, discMask, 5)
    markers, _ = scipy.ndimage.label(markerSource)
    
    
    img = img.astype(np.uint8)
    
    # xm, ym = np.ogrid[0:img.shape[0]:10, 0:img.shape[1]:10]
    # markers = np.zeros_like(img).astype(np.int16)
    # markers[xm, ym]= np.arange(xm.size*ym.size).reshape((xm.size,ym.size))
    img = scipy.ndimage.watershed_ift(img, markers)
    #img[xm, ym] = img[xm-1, ym-1]
    
    objects = scipy.ndimage.find_objects(img)
    #objects = objects[type(objects) is tuple]
    minXLen = 10
    minYLen = 10
    maxXLen = img.shape[0] // 1.9
    maxYLen = img.shape[1] // 1.9
    objects = [a for a in objects 
               if a != None and 
               (a[0].stop - a[0].start) > minXLen and 
               (a[0].stop - a[0].start) < maxXLen and 
               (a[1].stop - a[1].start) > minYLen and 
               (a[1].stop - a[1].start) < maxYLen]
    areas = [(a[0].stop - a[0].start) * (a[1].stop - a[1].start) for a in objects]
    areaIndices = np.argsort(areas);
    objects = [objects[i] for i in areaIndices]
    
    if len(objects) > 7:
        midIndex = len(objects)//2
        objects = objects[midIndex:]
        
    
    patches = [img[obj] for obj in objects]
    patches = [np.choose(p  == p[patches[0].shape[0]//2, patches[0].shape[1]//2], [0,1]) for p in patches]
    
    try:
        meanIntensity = np.mean(gray[objects[0]] * patches[0].astype(np.uint8))
    except:
        meanIntensity = gray[gray.shape[0] // 2, gray.shape[1] // 2]
    try:
        meanColor = np.mean(original[objects[0]] * tools.intensityToRgb(patches[0].astype(np.uint8)), axis=(0,1))
    except:
        meanColor = original[original.shape[0] // 2, original.shape[1] // 2]
    
    features = []
    for patch in patches:
        meanIntensity = np.mean(gray[objects[0]] * patches[0].astype(np.uint8))
        meanColor = np.mean(original[objects[0]] * tools.intensityToRgb(patches[0].astype(np.uint8)), axis=(0,1))
        featureVec = extractSegmentFeatures(patch)
        featureVec.append(meanIntensity)
        featureVec.append(meanColor[0])
        featureVec.append(meanColor[1])
        featureVec.append(meanColor[2])
        featureVec.append(sample.klassId)
        if len(features) == 0: 
            features = np.asarray(featureVec)
        else:
            features[0] += featureVec[0]
            features[1] += featureVec[1]
            features[2] = min(features[2], featureVec[2])
            features[3] += featureVec[3]
            features[4] += featureVec[4]
            features[5] += featureVec[5]
            features[6] += featureVec[6]
            
            features[7] += featureVec[7]
            features[8] += featureVec[8]
            features[9] += featureVec[9]
            features[10] += featureVec[10]
            
    if len(features) == 0:
        f = np.zeros(12)
        f[11] = sample.klassId
        return f
    
    length = len(patches)
    features[0] /= length
    features[1] /= length

    features[3] /= length
    features[4] /= length
    features[5] /= length
    features[6] /= length
    features[7] /= length
    features[8] /= length
    features[9] /= length
    features[10] /= length
    
    
    return features


    

In [372]:
data = []

numSamples = len(samples)
print(f'Number of Samples: {numSamples}')

for i, sample in enumerate(samples):
    data.append(process(sample))
    if i % 10 == 0:
        print(f'Processed: {i} ({100 * i / numSamples}%)')
        


np.save('data.npy', data, allow_pickle=False)




Number of Samples: 20
Processed: 0 (0.0%)
Processed: 10 (50.0%)
[array([ 81.5719073 ,  76.71077647, 326.6       ,  27.5       ,
        29.75      ,   1.0745614 , 444.5       ,  74.06521739,
        77.80267559,  73.81939799,  75.59866221,   1.        ]), array([ 60.14850433,  67.61373259, 728.9       ,  46.5       ,
        48.5       ,   1.0474972 , 191.16666667,  63.16360968,
        58.09391049,  64.91122524,  72.39178283,   2.        ]), array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 5.]), array([1.46898687e+02, 1.80815727e+02, 1.46280000e+03, 7.82000000e+01,
       9.14000000e+01, 1.19709319e+00, 6.23400000e+02, 6.01308556e+01,
       5.79276036e+01, 6.14049555e+01, 6.56550523e+01, 3.00000000e+00]), array([7.39330951e+01, 7.79545295e+01, 1.49450000e+03, 4.61666667e+01,
       4.98333333e+01, 1.10493447e+00, 2.27833333e+02, 6.75155303e+01,
       6.50064394e+01, 6.76594697e+01, 8.79109848e+01, 4.00000000e+00]), array([9.11686858e+01, 9.57091387e+01, 1.42170000e+03, 6.88571429e

In [366]:


#np.save('test', np.zeros(10), allow_pickle=False)
print(len(process(samples[0])))


12


In [352]:
print(np.load('data.npy'))

[[2.02283056e+02 2.42921210e+02 7.50000000e+02 1.07750000e+02
  1.19750000e+02 1.25847232e+00 7.04250000e+02 3.09292790e+01
  2.94994496e+01 3.18769950e+01 3.00437534e+01 2.00000000e+00]
 [1.44714951e+02 1.56281492e+02 2.60200000e+02 5.85000000e+01
  8.05000000e+01 1.26636905e+00 1.00350000e+03 5.82400756e+01
  5.70642722e+01 5.92079395e+01 6.21342155e+01 6.00000000e+00]
 [1.30452714e+02 1.38500904e+02 9.79900000e+02 3.65000000e+01
  4.80000000e+01 1.31118619e+00 8.09500000e+02 5.01347007e+01
  5.35604213e+01 5.00338137e+01 5.26856984e+01 5.00000000e+00]
 [6.20831367e+01 6.64765844e+01 1.00750000e+03 5.18888889e+01
  5.37777778e+01 1.03858998e+00 1.40444444e+02 5.96375661e+01
  5.67074074e+01 6.14449735e+01 6.38693122e+01 2.00000000e+00]
 [7.92595947e+01 7.98236505e+01 1.43040000e+03 5.75833333e+01
  5.93333333e+01 1.03464989e+00 1.89250000e+02 6.33150862e+01
  6.33629310e+01 6.26663793e+01 8.50353448e+01 6.00000000e+00]]


In [365]:
len(np.zeros(10))

10