In [1]:
# Initialize these:
typeImg = "photo" # photo/ cartoon/ all"
numImg = "5" # 5 or 50
sz = 100

In [2]:
# # Include imports
import glob
from skimage.transform import resize
from skimage.io import imread, imsave, imshow
import os
import pandas as pd
import numpy as np
import skimage.color as skcolor
import matplotlib.pyplot as plt

from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
import time

In [3]:
colNames = ["col_"+str(i) for i in range(58)] 
allConcepts = ['mango','watermelon','honeydew','cantaloupe','grapefruit','strawberry','raspberry','blueberry','avocado','orange','lime','lemon']

# Read data
# Get the color ratings data -From file data_clean.csv
data = pd.read_csv("data_clean.csv",names = colNames)

# Get the LAB coordinates of colors from file LabOrig.csv
#colorData = pd.read_csv("LabOrig.csv",header=None)
colorData = pd.read_csv("Lab.csv",header=None)
colorDataRGB = pd.read_csv("RGB.csv",header=None)

category = pd.read_csv("Category.csv",header=None)
path = "E:/GitRepo/Color-Concept-Associaitons-using-Google-Images/downloads"
# if not os.path.exists( path + "/trainResized" ):
#    os.makedirs( path + "/trainResized/" )


In [4]:
# Count total number of images
count = 0
for i in allConcepts:
    trainFiles = glob.glob( path + "/" +typeImg +"/" +i+"/*_Top_"+numImg+"/*.*" )
    count = count + len(trainFiles)
    #print(count)
print("Category: ",typeImg)
print("Total images : ",count)

Category:  photo
Total images :  60


In [5]:
# Function which gets all the 186 features and all colors: Output 58 x 186 feature array - for a single image
def getImageFeatures(nameFile,imgLAB,conceptNum):
    rowNum = -1
    x = np.zeros((58,numFt+1) )
    
    # get the segmented image s.t. only pixels which are segmented are included in the matrix
    segmentedImg,segmentedImgCat = getSegmentedImage(nameFile,sz)
    #print("Segmented image retrieved")
    # Stack features for all 58 colors for a single image -- total row size : 58N
    
    # Get spherical features
    sphFt = getSphericalFeatures(imgLAB,segmentedImg)
    # Would return a matrix of size 58 x 30
    #print("Shape of spherical array",sphFt.shape)
        
    # Get cylindrical feature
    cylFt = getCylindricalFeatures(imgLAB,segmentedImg)
    # Would return a matrix of size 58 x 150
    #print("Shape of cylindrical array",cylFt.shape)
    
    # Get categorical features as well as assign label
    #segmentedImgCat = segmentedImg
    catFt = getCategoricalFeatures(nameFile, sz, conceptNum,segmentedImgCat)
    
    x = np.concatenate((sphFt,cylFt,catFt), axis=1)
    #print("Shape of image feature matrix: ",x.shape)
    return x        
    
    
    

In [6]:
def getSlicedImageArray(imgLAB,i,type=1):
    if type == 1:
        if i==0:
            xx = imgLAB[27:72,27:72,:] # 20%   # >>>>>>>>>>>>>>>>>>> TODO change the slicing
        elif i==1:
            xx = imgLAB[19:83,19:83,:] # 40%
        elif i==2:
            xx = imgLAB[12:90,12:90,:] # 60%
        elif i==3:
            xx = imgLAB[6:96,6:96,:] # 80%
       
    else:
        if i==0:
            xx = imgLAB[27:72,27:72] # 20%   # >>>>>>>>>>>>>>>>>>> TODO change the slicing
        elif i==1:
            xx = imgLAB[19:83,19:83] # 40%
        elif i==2:
            xx = imgLAB[12:90,12:90] # 60%
        elif i==3:
            xx = imgLAB[6:96,6:96] # 80%
        
    return xx

In [7]:
def getSphericalFeatures(imgLAB, segmentedImg):
    
    # -- Input : imgLAB : matrix 100 x 100 x 3 which has LAB values of every pixel in the image.
    # -- Input : colorLAB : vector 1 x 3 which has LAB values of the specific color
    # -- Input : segmentedImg : Using Active Contours
    
    # -- returns array of 30 features for an image for a specific color.
    
    # Rc values : 1,10,20,30,40
    # 5 different window sizes (middle 20%, 40%, 60%, 80%, and 100% of image) and 1 segmented 
    
    # Example : 1st column : Fraction of pixels in 20% of the image within Rc=1
    #         : 2nd column : Fraction of pixels in 20% of the image within Rc=10
    #         : 6th column : Fraction of pixels in 40% of the image within Rc=1 and so on..
    
    features = np.zeros((58,30))
    Rc = [1,10,20,30,40]   # Radii thresholds

    colnum = 0
    for i in range(6):
        if i == 4:
            xx = imgLAB #np.copy(imgLAB) np.copy(imgLAB)
        elif i == 5:
            xx = segmentedImg
        else:
            xx = getSlicedImageArray(imgLAB,i)
            
        rowNum = -1   
        for color in range(58):
            rowNum+=1
            colorLAB = np.array(colorData.loc[color])   #get LAB values for specific color
            
            # Calculate Euclidean distance of every pixel from the specific color 
            distPixel = skcolor.deltaE_cie76(xx,colorLAB)

            distPixel = distPixel.flatten()
            #print(distPixel.size)
            col = colnum
            for j,threshold in enumerate(Rc):
                num = sum(1 for t in distPixel  if t <= threshold)/distPixel.size
                #features.append(num)
                features[rowNum,col] = num
                col+=1
        colnum+=5    
    #print(features) 
    return features

In [8]:
Li = [1, 10, 20, 30, 40]
R = [1, 10, 20, 30, 40]     # same as L
hueAngle = [5, 10, 20, 30, 40] # Hue Angle

def getCylindricalFeatures(imgLAB,segmentedImg):
    # Defines feature for an image. 
    # -- Input : imageLAB : matrix 100 x 100 x 3 which has LAB values of every pixel in the image.
    # -- Input : sampleLAB : vector 1 x 3 which has LAB values of the specific color
    # -- returns 6(area)*len(R)*len(hueAngle) = 150 columns for an image for a specific color
    
    # R values : 0 or 1,10,20,30,40
    # 5 different theta values (same as RC) - but features will be all combinations of R and theta
    # Example : 1st column : Fraction of pixels in 40% of the image which are within R=1 and theta = 5 from specific color
    
    
    features = np.zeros((58,150))

    colnum = 0
    for i in range(6):
        if i == 4:
            xx = imgLAB #np.copy(imgLAB)
        elif i == 5:
            xx = segmentedImg
        else:
            xx = getSlicedImageArray(imgLAB,i)
        
        ## Do colnum+5 at the end 
        L = xx[:,:,0]
        a = xx[:,:,1]
        b = xx[:,:,2]
        #print(np.shape(L))
         # Get polar coordinates
        LTemp = L.flatten()
        r = np.sqrt(a**2+b**2).flatten()
        theta = np.degrees(np.arctan2(b,a)).flatten()
        
        rowNum = -1
        for color in range(58):
            rowNum += 1
            colorLAB = np.array(colorData.loc[color])

            L_diff = np.abs(LTemp - colorLAB[0])
            r_diff = np.abs(r - (np.sqrt(colorLAB[1]**2 + colorLAB[2]**2)))
            theta_diff = np.abs(theta - np.degrees(np.arctan2(colorLAB[2],colorLAB[1])))
            
            col = colnum
            for j,r_th in enumerate(R):
                l_th = Li[j] # L and R are the same.
                for theta_th in hueAngle:
                    pixels = 0
                    for p in range(len(L_diff)):
                        if (L_diff[p] <= l_th and r_diff[p] <= r_th and theta_diff[p] <= theta_th) or (L_diff[p] <= l_th and r_diff[p] <= 2.5):
                            pixels += 1
                    features[rowNum,col] = pixels/(len(L_diff))
                    col = col + 1 
        # Increment colnum to be +25
        colnum+=25
    return features


In [9]:
import matlab.engine
category = pd.read_csv("Category.csv",header=None)

def getCategoriesMatrix(imageFileName,sz):
    ## Get total pixel counts of all 11 categories for this imageFile
    eng = matlab.engine.start_matlab()
    imageCat = np.array(eng.getCategories(imageFileName,sz))
    eng.quit()
     # imageCat : sz x sz matrix where each pixel has its category
    return imageCat

def getSegmentedImage(imageFileName,sz): # Returns segmented pixels in LAB and its corresponding color category.
    eng = matlab.engine.start_matlab()
    segImg, segImgCat = eng.activeContourSegmentation(imageFileName,sz, nargout=2)
    eng.quit()
    segImg = np.array(segImg)/255;
    segImg = skcolor.rgb2lab(segImg)
    segImgCat = np.array(segImgCat)
    return segImg, segImgCat


In [10]:
# p,q = getSegmentedImage(nameFile,sz)
# print(p.shape, q.shape)

In [11]:
def getCategoricalFeatures(nameFile, sz, conceptNum, segmentedImgCat):
    imageWithCategories = getCategoriesMatrix(nameFile,sz)
    features = np.zeros((58,7))
    col = 0
    for i in range(6):
        if i == 4:
            xx = imageWithCategories #np.copy(imageWithCategories)
        elif i == 5:
            xx = segmentedImgCat
        else:
            xx = getSlicedImageArray(imageWithCategories,i,2)
        #mat = np.copy(imageWithCategories)
        
        categoryImg = [0 for p in range(11)]
        for k in range(11):
            categoryImg[k] = sum(1 for p in xx.flatten() if p-1 == k)/xx.size
        rowNum = -1
        
        for color in range(58):
            rowNum += 1
            # get Category for the specific color
            colorCat = category.loc[color,0]   #print(category.iloc[10,0])
            #print(colorCat-1)
            # Assign the specific colorCat's fractional value to the column
            features[rowNum,col] = categoryImg[colorCat-1]
            if col == 5:
                # Assign label
                label = data.loc[conceptNum]['col_'+str(color)]
                features[rowNum,col+1] = label
        col = col + 1

    return features

In [12]:
numFt = 186
X = np.zeros((count*58,numFt+1) )  # +1 for label #Number of features [] # Feature Vector -- 58N rows
init = time.time()
rowNum = -1
strtInd = []  # To keep track of the starting indices of fruits in the feature matrix
cvInd = 1 
counter = 0
sz = 100    # Size of image: 100 by 100

for conceptNum,concept in enumerate(allConcepts):
    t0 = time.time()
    print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>\n",conceptNum,": Concept : ",concept)
    # Get label for corresponding fruit (for given color)
    #label = data.loc[conceptNum]['col_'+str(color)]
    ct = 0    # Keeps track of images inside a category
    strtInd.append(counter)
    trainFiles = glob.glob( path + "/" +typeImg +"/" +concept+"/*_Top_"+numImg+"/*.*" )
    print("Total number of images for ",concept," : ",len(trainFiles))
    t = []
    for i, nameFile in enumerate(trainFiles):
        t1 = time.time()
        #print("\nReading file ......")
        ct = ct + 1
        image = imread( nameFile )
        imageResized = resize( image, (sz,sz),anti_aliasing=True,mode='reflect')
        img = imageResized
        # Convert the pixels in  image to LAB space
        imgLAB = skcolor.rgb2lab(img)

        # Call the function which calculates all 186 features for all colors
        x = getImageFeatures(nameFile,imgLAB,conceptNum)
        X[counter:counter+58,:] = np.row_stack(x)
        # Increment counter by 58
        counter += 58
        print(i, "image:", time.time()-t1," seconds")
        t.append(time.time()-t1)
    print(t)
    print("** Time elapsed: ",time.time()-t0," seconds **")
strtInd.append(len(X))
print(strtInd)
print("** Time elapsed: ",time.time()-init," seconds **")

>>>>>>>>>>>>>>>>>>>>>>>>>>>>
 0 : Concept :  mango
Total number of images for  mango  :  5
0 image: 108.7892050743103  seconds
1 image: 103.6506884098053  seconds
2 image: 106.10732698440552  seconds
3 image: 103.53623414039612  seconds
4 image: 107.59362602233887  seconds
[108.79020166397095, 103.6506884098053, 106.10732698440552, 103.53623414039612, 107.59362602233887]
** Time elapsed:  529.6800737380981  seconds **
>>>>>>>>>>>>>>>>>>>>>>>>>>>>
 1 : Concept :  watermelon
Total number of images for  watermelon  :  5
0 image: 119.15496301651001  seconds
1 image: 98.79854989051819  seconds
2 image: 129.70920181274414  seconds
3 image: 109.23964023590088  seconds
4 image: 108.15051674842834  seconds
[119.15596008300781, 98.80054569244385, 129.70920181274414, 109.24063730239868, 108.15151453018188]
** Time elapsed:  565.0598542690277  seconds **
>>>>>>>>>>>>>>>>>>>>>>>>>>>>
 2 : Concept :  honeydew
Total number of images for  honeydew  :  5
0 image: 88.63577365875244  seconds
1 image: 104

In [13]:
strtInd

[0, 290, 580, 870, 1160, 1450, 1740, 2030, 2320, 2610, 2900, 3190, 3480]

In [14]:
## convert your array into a dataframe
xTemp = np.copy(X)
#y = np.copy(X[:,-1])
df = pd.DataFrame(xTemp)
## save to xlsx file

filepath = 'dataPhoto5.xlsx'
df.to_excel(filepath, index=False)
tot = len(xTemp)

In [15]:
# import time
# init = time.time()
# for conceptNum,concept in enumerate(allConcepts):
#     trainFiles = glob.glob( path + "/" +typeImg +"/" +concept+"/*_Top_"+numImg+"/*.*" )
#     print("Total number of images for ",concept," : ",len(trainFiles))
#     t0 = time.time()
#     for i, nameFile in enumerate(trainFiles):
#         t1 = time.time()
#         print(i, ": ",nameFile)
#         image = imread( nameFile )
        
# #         imageResized = resize( image, (sz,sz),anti_aliasing=True,mode='reflect')
# #         img = imageResized
# #         eng = matlab.engine.start_matlab()
# #         imageCat = np.array(eng.imread(nameFile))
# #         eng.quit()
#         # Convert the pixels in  image to LAB space
# #         imageLAB = skcolor.rgb2lab(img)
# #         segmentedImg = np.copy(imageLAB)
        
#         #ft = getImageFeatures(nameFile,imgLAB,conceptNum)
# #         ft1 = getSphericalFeatures(imageLAB, segmentedImg)
# #         ft2 = getCylindricalFeatures(imageLAB, segmentedImg)
# #         ft3 = getCategoricalFeatures(nameFile,sz,conceptNum, segmentedImg)
        
#         print(time.time()-t1," seconds")
#     print("\n ** Time elasped : " ,time.time()-t0 ," seconds **")