In [None]:
import imutils
import cv2

import os
import pandas as pd
import numpy

from sklearn.cluster import KMeans
#from sklearn.cluster import DBSCAN
from sklearn.preprocessing import MinMaxScaler

%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
 

folder = "test/" #input/output folder

df = pd.DataFrame(columns=['Name', 'Size', 'Contour', 'Color R', 'Color G', 'Color B'])

In [None]:
# Function to get features from each image file
#print(os.listdir())
for count, filename in enumerate(os.listdir(folder)):
    #print(filename) 
    
    # verify filetype
    name, ext = os.path.splitext(filename)
    if ext == ".jpg" or ext == ".png":

        inp = folder + filename
        dst = folder + "z_" + filename
        src = folder + filename 

        size = os.path.getsize(folder + filename)
        #print(size)

        # load the input image from disk
        image = cv2.imread(inp)

        # Find an average color
        #avg_color1 = image.mean(axis=0).mean(axis=0)
        #print(avg_color1)
        avg_color_per_row = numpy.average(image, axis=0)
        avg_color = numpy.average(avg_color_per_row, axis=0)
        #print(avg_color)

        # Convert the image to grayscale, blur it, and threshold it
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        blurred = cv2.GaussianBlur(gray, (1,1), 0)
        thresh = cv2.threshold(blurred, 120, 255, cv2.THRESH_BINARY)[1]

        # Extract contours from the image
        cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
            cv2.CHAIN_APPROX_SIMPLE)
        cnts = imutils.grab_contours(cnts)
        #print(len(cnts))

        # loop over the contours and draw them on the input image
        #for c in cnts:
        #    cv2.drawContours(image, [c], -1, (0, 0, 255), 2)

        # Write the output image to disk
        #cv2.imwrite(dst, image)

        # Append values to dataframe
        df = df.append(dict(zip(df.columns,[filename, size, len(cnts), avg_color[0],avg_color[1],avg_color[2]])), ignore_index=True)

In [None]:
# Strings to numeric
df["Contour"] = pd.to_numeric(df["Contour"])
df["Size"]= pd.to_numeric(df["Size"])
df.describe()

In [None]:
# Normalization [0,1]
scaler = MinMaxScaler()
df0 = scaler.fit_transform(df[['Size', 'Contour', 'Color R', 'Color G', 'Color B']])
df1 = pd.DataFrame(df0, columns = ['Size', 'Contour', 'Color R', 'Color G', 'Color B'])
df1.describe()

In [None]:
# Plot example
plt.plot(df1["Size"],df1["Contour"], 'o', color='black')

In [None]:
# Run clustering algorithm
k=4
ni=20
kmeans = KMeans(n_clusters=k, n_init=ni).fit(df1)
#kmeans = DBSCAN(eps=0.8, min_samples=5).fit(df1)
print(kmeans.labels_)
# Centroids to represent
centers0 = kmeans.cluster_centers_
centers = scaler.inverse_transform(centers0)

In [None]:
# Add 'y' column with clustering classification
df['y'] = kmeans.labels_ 
df.describe()

In [None]:
# Plot example with clustering colours and centroids
plt.scatter(df["Size"],df["Contour"], c=df['y'], cmap='viridis')
plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5);

In [None]:
# Save output file
fname = folder + "out_cluster_res.csv"
df.to_csv(fname)