In [5]:
import numpy as np
import cv2
import os
from sklearn.cluster import KMeans
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from skimage import exposure
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
import sys
class Image:
    def __init__(self, img, label):
        self.img = img
        self.label = label

def Load_Dataset():
    currpath="Data/Product Classification"
    Classes=os.listdir(currpath)
    Classes.sort( key=lambda x: int(x))
    Classes=list(map(lambda x:currpath+"/"+x,Classes))
    X_train,y_train,X_valid,y_valid= [], [], [], []
    trainset=[]
    validset=[]
    mx=(0,0)
    for Class in Classes:
        trainpart=os.listdir(Class+"/"+"Train")
        trainpart=list(map(lambda x:Class+"/"+"Train"+"/"+x,trainpart))
        for file in trainpart:
            if "Augmented" in file:
                print(file);continue
            img=cv2.imread(file,0)
            label=Class.split("/")[-1]
            mx=max(mx,img.shape)
            trainset.append(Image(img,label))
            X_train.append(img)
            y_train.append(label)
        if "Validation"  not in os.listdir(Class):continue
        validpart=os.listdir(Class+"/"+"Validation")
        validpart=list(map(lambda x:Class+"/"+"Validation"+"/"+x,validpart))
        for file in validpart:
            img=cv2.imread(file,0)
            label=Class.split("/")[-1]
            validset.append(Image(img,label))
            X_valid.append(img)
            y_valid.append(label)
    return trainset,validset,X_train,y_train,X_valid,y_valid

def preprocessing(DataSet):
    processed_images = []
    labels = []

    for image in DataSet:
        img = image.img
        # Resize the image
        # Normalize the image
        normalized_image = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX).astype('uint8')
        # Gaussian Blur
        blurred_image = cv2.GaussianBlur(normalized_image, (3, 3), 0)
        processed_images.append(blurred_image)
        labels.append(image.label)

    return processed_images, labels
#1- features extraction
def extract_sift_features(images,sift):
    descriptors_list = []
    for img in images:
        kp, des = sift.detectAndCompute(img,None)
        if des is not None:
            descriptors_list.append(des)
    return descriptors_list

#2 - build vocabulary to finite the descriptors
def build_vocabulary(descriptors_list, vocab_size):
    descriptors = np.vstack(descriptors_list)
    Nclusts=list()
    SSR=list()
    for i in range(20,vocab_size,10):
        print(i)
        kmeans = KMeans(n_clusters=i, random_state=1900,)
        kmeans.fit(descriptors)
        SSR.append(kmeans.inertia_)
        Nclusts.append(i)
    return SSR,Nclusts

In [6]:
trainset,validset,X_train,y_train,X_valid,y_valid=Load_Dataset()
X_train_processed, y_train_processed = preprocessing(trainset)
X_valid_processed, y_valid_processed = preprocessing(validset)
sift = cv2.SIFT_create()
X_train_desc=extract_sift_features(X_train_processed,sift)
X_valid_desc=extract_sift_features(X_valid_processed,sift)
print("Finished")
SSR,Nclusts=build_vocabulary(X_train_desc,150)

Data/Product Classification/1/Train/web10Augmented.png
Data/Product Classification/1/Train/web11Augmented.png
Data/Product Classification/1/Train/web1Augmented.png
Data/Product Classification/1/Train/web2Augmented.png
Data/Product Classification/1/Train/web3Augmented.png
Data/Product Classification/1/Train/web4Augmented.png
Data/Product Classification/1/Train/web5Augmented.png
Data/Product Classification/1/Train/web6Augmented.png
Data/Product Classification/1/Train/web7Augmented.png
Data/Product Classification/1/Train/web8Augmented.png
Data/Product Classification/1/Train/web9Augmented.png
Data/Product Classification/2/Train/web1Augmented.png
Data/Product Classification/2/Train/web2Augmented.png
Data/Product Classification/2/Train/web3Augmented.png
Data/Product Classification/2/Train/web4Augmented.png
Data/Product Classification/2/Train/web5Augmented.png
Data/Product Classification/2/Train/web6Augmented.png
Data/Product Classification/2/Train/web7Augmented.png
Data/Product Classificatio

  super()._check_params_vs_input(X, default_n_init=10)


30


  super()._check_params_vs_input(X, default_n_init=10)


40


  super()._check_params_vs_input(X, default_n_init=10)


50


  super()._check_params_vs_input(X, default_n_init=10)


60


  super()._check_params_vs_input(X, default_n_init=10)


70


  super()._check_params_vs_input(X, default_n_init=10)


80


  super()._check_params_vs_input(X, default_n_init=10)


90


  super()._check_params_vs_input(X, default_n_init=10)


100


  super()._check_params_vs_input(X, default_n_init=10)


110


  super()._check_params_vs_input(X, default_n_init=10)


120


  super()._check_params_vs_input(X, default_n_init=10)


130


  super()._check_params_vs_input(X, default_n_init=10)
