In [1]:

import os

import cv2
import numpy as np
from sklearn import cluster
from sklearn.neural_network import MLPClassifier
from sklearn.cluster import KMeans
from sklearn.cluster import MiniBatchKMeans

from matplotlib import pyplot as plt

from library import Cloud 

In [2]:
# Randomly selecting n cloud images of each type
sample_percent = 0.1
sample_size = 40

# Path leading to all cloud data
path = "C:\\Users\\peter\\Documents\\Cloud Project\\Data\\swimcat\\"
cloud_types = ["A-sky","B-pattern","C-thick-dark","D-thick-white","E-veil"]

# initializing dict to store randomly n selected training images 
train_set = {}
test_set = {}
# Iterating over cloud types 
for ctype in cloud_types:
    fin = path + ctype +"\\images\\"
    
    # Reading all images of cloud type ctype from path fin + ctype
    allimgs = os.listdir(fin)
    
    # total number of images
    n_imgs = len(allimgs)
    
    # number of images to select
    n_sample = int(round(n_imgs*sample_percent,1))
    
    # shuffling list of images, slicing to get n images
    samples = allimgs[:sample_size]
    
    # Note that np.random.shuffle changes in place (ugh)
    np.random.shuffle(samples)
    
    # Dict key set to -> ctype, dict value -> samples
    train_set[ctype] = samples.copy()
    test_set[ctype] = allimgs[sample_size:]

In [3]:
# This cell reads in all sets [1-5] of s-parameters used in the paper.
# S-parameters are stored in a .txt file. 

os.listdir(os.getcwd())

sparams = {}

with open('sparams.txt') as fin:
    for line in fin:
        split = line.split(' ')
        
        params = [val.split(',') for val in split[1:]]
        params = [[float(num) for num in val.split(',')] for val in split[1:]]
        
        sparams[split[0]] = params


In [4]:
# This cell creates the cluster centers for each set of parameters. The
# clusters are computed using mini-batch kmeans, (~2 minutes, 5 imgs, for all sets)
# as standard kmeans takes too long (~30 minutes). 
# 
# Results are stored as binary files via pickle, and are also returned and 
# stored in a dictionary. 

centers = {}
kmeans_obj = {}
temp_texton_dict = {}
for key in list(sparams.keys()):
    kmeans_obj[key] = Cloud.get_textons(sparams[key], train_set, cloud_types, sample_size, 
                                                      name=key, path='..//Data//swimcat//')
    centers[key] = kmeans_obj[key].cluster_centers_
    

In [9]:
# This cell creates a nested dictionary of histograms for each set 
# of Gabor (sfilter) kernel parameters. Clustering is done using
# the kmeans Clustering objects defined in the previous cell. Histograms 
# are averaged as data is stored - output from each individual image is 
# NOT stored. 

texton_dict = {}
set_dict = {}

for key in list(kmeans_obj.keys()):
    texton_dict[key] = {}
    for ctype in cloud_types:
        for image in train_set[ctype]:

            output_array = np.zeros((125*125, len(sparams[key])))
            imgpath = path + ctype + "\\images\\" + image
            img = Cloud.scale_minmax(cv2.imread(imgpath, 1)[::, ::, 0]/cv2.imread(imgpath, 1)[::, ::, 2])
            
            dim = 0

            for param in sparams[key]:
                sfilter = Cloud.sfilter(param[0], param[1])

                output_array[::,dim] = np.reshape(Cloud.scale_minmax(cv2.filter2D(img, -1, sfilter)), (125 * 125, ))
                dim += 1
            
            prediction = np.bincount(kmeans_obj[key].predict(output_array))
            
            if len(prediction) < 30:
                prediction = np.append(prediction, [0]*(30 - len(prediction)))                    
            
            if ctype not in texton_dict[key].keys():
                texton_dict[key][ctype] = prediction
            else:
                # Averaging output distributions
                texton_dict[key][ctype] = (texton_dict[key][ctype] + prediction)/2.0
            
        


In [6]:
param

[0.5, 0.0]

In [10]:
def nearest_hist(clouddict, filt_img):

    def chidiff(truth, img):
        return np.sum(np.power((truth - img), 2)/(2*(truth + img)))

    closest = None
    dist = None
    for key in list(clouddict.keys()):
        if closest is None:
            closest = key
            dist = chidiff(clouddict[key], filt_img)

        elif chidiff(clouddict[key], filt_img) < dist:
            closest = key
            dist = chidiff(clouddict[key], filt_img)

    return closest

results = []
for key in list(texton_dict.keys())[2:3]:
    for ctype in list(test_set.keys()):
        for image in train_set[ctype]:

            output_array = np.zeros((125*125, len(sparams[key])))
            imgpath = path + ctype + "\\images\\" + image
            img = Cloud.scale_minmax(cv2.imread(imgpath, 1)[::, ::, 0]/cv2.imread(imgpath, 1)[::, ::, 2])
            
            dim = 0

            for param in sparams[key]:
                sfilter = Cloud.sfilter(param[0], param[1])

                output_array[::,dim] = np.reshape(Cloud.scale_minmax(cv2.filter2D(img, -1, sfilter)), (125 * 125, ))
                dim += 1
            
            prediction = np.bincount(kmeans_obj[key].predict(output_array))
            
            if len(prediction) == 28:
                prediction = np.append(prediction, (0,0)) 
            
            if len(prediction) == 29:
                prediction = np.append(prediction, 0)  
        
            results.append((ctype, nearest_hist(texton_dict[key], prediction)))
        

In [None]:
sfilter[35,35]

In [12]:
corr = 0
for result in results:
    if result[0] == result[1]:
        corr += 1
        
print(corr/len(results))

0.75


In [None]:
for key in texton_dict['set1:']:
    plt.figure()
    plt.title(key)
    plt.bar(list(range(0,30)),texton_dict['set1:'][key])
    plt.ylim((0, 1400))
    plt.savefig(key + ".png", dpi=500)

In [None]:
texon_dict = {}

for point in responses: 
    for cluster in kmeans.cluster_centers_:
        



In [None]:
kmeans_obj

In [None]:
imgpath = "C:\\Users\peter\Documents\Cloud Project\Data\swimcat\B-pattern\images\\B_1img.png"
a = gabor_fn(1,0,1,0,1)
img = scale_minmax(cv2.imread(imgpath,1))
test = scale_minmax(cv2.filter2D(img, -1, a))
plt.figure(1)
plt.imshow(test)
plt.figure(2)
plt.imshow(a)

In [None]:
texton_dict

In [None]:
responses.shape

In [None]:
test_set

In [None]:
results

In [None]:
sfilter.shape

In [None]:
plt.imshow(gabor_fn(10,0,6,0,1))

In [None]:
def sfilter(sigma, tau):

    sigma_x = sigma
    sigma_y = float(sigma)

    # Bounding box
    nstds = 3 # Number of standard deviation sigma
    xmax = max(abs(nstds * sigma_x), abs(nstds * sigma_y))
    xmax = np.ceil(max(1, xmax))
    ymax = max(abs(nstds * sigma_x), abs(nstds * sigma_y))
    ymax = np.ceil(max(1, ymax))
    xmin = -xmax
    ymin = -ymax
    (y, x) = np.meshgrid(np.arange(ymin, ymax + 1), np.arange(xmin, xmax + 1))



    gb = np.cos(np.sqrt(x**2 + y**2)*np.pi*tau/sigma)*np.exp(-(x**2 + y**2)/(2*sigma**2))
    return gb

In [None]:
plt.imshow(sfilter(10, 4))