<a href="https://colab.research.google.com/github/SampadSikder/Nonnegative-Matrix-Factorization-Experimentation/blob/main/accuracy_of_coil_dataset_with_visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Equations from https://proceedings.neurips.cc/paper/2000/file/f9d1152547c0bde01830b7e8bd60024c-Paper.pdf

In [1]:
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
from sklearn.cluster import KMeans
import numpy
import matplotlib.pyplot as mtp
import os
from sklearn.decomposition import NMF
import collections

In [2]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


# Code Library

In [3]:
import time

def extractFrameFromImage(filePath, imageLimit, frame_rate):
  vidcap = cv2.VideoCapture(filePath)
  success,image = vidcap.read()
  count = 0
  #define framerate
  prev = 0
  frame=0
  while success:
    time_elapsed = time.time() - prev
    success,image = vidcap.read()  
    if time_elapsed > 1./frame_rate:
      prev = time.time()
      cv2.imwrite("%d.jpg" % count, image)     # save frame as JPEG file      
      #print('Read a new frame: ', success)
      count=count+1
      frame += frame_rate # i.e. at 30 fps, this advances one second
      vidcap.set(cv2.CAP_PROP_POS_FRAMES, frame)
    if count>=imageLimit:
      break
  vidcap.release()
  cv2.destroyAllWindows()

  

In [4]:
def countWidthandHeight(imagePath, downscalingFactor):
  width,height=0,0;
  for img in os.listdir(imagePath):
    extension = os.path.splitext(img)[1][1:]
    if extension=='jpg' or extension=='png':
      sampleImagePath=imagePath+'/'+img
      img=cv2.imread(sampleImagePath)
      image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
      #resize

      width = int(image.shape[1]/downscalingFactor)
      height = int(image.shape[0]/downscalingFactor)

      print(width, height)

      break


  count=0

  for image_name in os.listdir(imagePath):
    extension = os.path.splitext(image_name)[1][1:]
    if extension=='jpg' or extension=='png':
      count=count+1

  return width, height, count

In [5]:
def convertAllImageToMatrix(obj, width, height, imageCount, imageRootDir): # imageCount = number of images  , imageRootDir = "/content"

  y = width*height

  print(imageCount) 

  m = np.zeros(shape=(imageCount, int(y)))
  print(m.shape)
  itr = 0

  cntImage={}

  for index, image in enumerate(obj):
    cntImage[obj[index]]=0

  name_list = []
  for image_name in os.listdir(imageRootDir): 
    extension = os.path.splitext(image_name)[1][1:]
    if extension=='jpg' or extension=='png':
      image_label=image_name.split("__")[0]
      if image_label in obj and cntImage[image_label]<50:
        cntImage[image_label]+=1
        image_path=imageRootDir+'/'+image_name
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        #resize
        dim = (width, height)
        print(width, height)
        #resizing image
        resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
        
        rows,cols=resized.shape#normal shape
        img_size = rows*cols
        img_1D_vector = resized.reshape(img_size)
        
        m[itr]=img_1D_vector
        itr=itr+1
        name_list.append(image_name)
        print(itr)

        if itr>=imageCount:
          break
      
  return m, name_list, itr

In [6]:
def normalizeM(m):
  sum=0
  row, col=m.shape
  for i in range(0,row):
    for j in range(0,col):
      sum+=m[i][j]
  mean=sum/row*col 

  m=m/mean

  return m, mean

In [7]:
def updateW(x,w,h):
  #XVT
  ht=np.transpose(h)
  numerator=np.matmul(x,ht)
  #UVVT
  den1=np.matmul(w,h)
  denominator=np.matmul(den1,ht)

  #return
  print('U completed#########################################################################################')
  secondPart=np.divide(numerator,denominator)
  return np.multiply(w,secondPart)

In [8]:
def updateH(x,w,h):
  #UT*X
  wt=np.transpose(w)
  numerator=np.matmul(wt,x)
  #UTUV
  den1=np.matmul(w,h)
  denominator=np.matmul(wt,den1)
  
  #update return
  print('V completed#########################################################################################')
  secondPart=np.divide(numerator,denominator)
  return np.multiply(h,secondPart)

In [9]:
def performNMF(x, featureNo, imageCount, EPSILON=1e-9):
  w=np.random.normal(3,2.5,size=(imageCount,featureNo))
  h=np.random.normal(3,2.5,size=(featureNo, x.shape[1]))

  print("Shape of W: "+str(w.shape))
  print("Shape of H: "+str(h.shape))

  itr=0
  cost=np.linalg.norm(x-np.matmul(w,h))
  while cost>EPSILON:
    if itr%2==0:
      h=updateH(x,w,h)
    else:
      w=updateW(x,w,h)
    wh=np.matmul(w,h)
    print(w,h)
    cost=np.linalg.norm(x-wh)
    print(cost)
    if itr>=100:
      break
    itr=itr+1
  return w,h

### Testing Code

In [10]:
from google.colab.patches import cv2_imshow

In [11]:
def getImageListFromM(x_new):
  images = []
  for row in x_new:
    img_data = []
    for i in range(height):
      img_row = []
      for j in range(width):
        img_row.append(row[i*width + j])
      img_data.append(img_row)
    images.append(img_data)
  return images

In [12]:
def compareAndTest(m, w, h, mean):
  x_new=np.matmul(w,h)
  x_new=np.ceil(x_new*mean)
  #x_new=np.transpose(x_new)

  resulting_images = getImageListFromM(x_new)
  original_images = getImageListFromM(np.ceil(m*mean))

  for index in range(m.shape[0]):
    print("-------------------------------Number of image: "+str(index))

    img=np.array(resulting_images[index])
    cv2_imshow(img)
    print("Image after NMF:")

    img=np.array(original_images[index])
    cv2_imshow(img)
    print("Image Before NMF:")


In [13]:
def variance(m): # m = matrix of all images
  return np.var(m)

In [14]:
def __grouper(labels, original_images, resulting_images):
    groups = []
    for current_label in set(labels):
      cluster = []
      for index,label in enumerate(labels):
        if label == current_label:
          node = [label, original_images[index], resulting_images[index]]
          cluster.append(node)
      groups.append(cluster)
    return groups

In [15]:
def compareAndTestWithKNN(m, w, h, mean, n_clusters):
  x_new=np.matmul(w,h)
  x_new=np.ceil(x_new*mean)
  #x_new=np.transpose(x_new)

  kmeans = KMeans(n_clusters, random_state=0)
  kmeans.fit(w)

  resulting_images = getImageListFromM(x_new)
  original_images = getImageListFromM(np.ceil(m*mean))
  
  clusters = __grouper(kmeans.labels_, original_images, resulting_images)

  for cluster in clusters:
    print("----------------Labels------------------------")
    for index, node in enumerate(cluster):
      print("-------------------------------Number of image: "+str(index)+"-----------Cluster="+str(node[0])+".")
      img=np.array(node[1])
      cv2_imshow(img)
  
  return kmeans

In [16]:
def scatterPlot(kmeans,F):
  y_predict= kmeans.fit_predict(F)  
  mtp.scatter(F[y_predict == 0, 0], F[y_predict == 0, 1], s = 10, c = 'blue', label = 'Cluster 1') #for first cluster  
  mtp.scatter(F[y_predict == 1, 0], F[y_predict == 1, 1], s = 10, c = 'green', label = 'Cluster 2') #for second cluster  
  mtp.scatter(F[y_predict== 2, 0], F[y_predict == 2, 1], s = 10, c = 'red', label = 'Cluster 3') #for third cluster  
  mtp.scatter(F[y_predict == 3, 0], F[y_predict == 3, 1], s = 10, c = 'cyan', label = 'Cluster 4') #for fourth cluster  
  mtp.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 30, c = 'yellow', label = 'Centroid')   
  mtp.title('Clusters')  

  mtp.legend()  
  mtp.show() 

# Experimentation Area

In [89]:
def testSingleFrameMatrix(image, width, height, kmeans, nmf_model):
  image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

  dim = (width, height)
  resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
  
  rows,cols=resized.shape
  img_size = rows*cols
  img_1D_vector = resized.reshape(img_size)
  print(img_1D_vector.shape)
  img_transformed = nmf_model.transform([img_1D_vector])
  print(img_transformed.shape)
  prediction = kmeans.predict(img_transformed)[0]

  return prediction


In [90]:
def getLabels(imageNameList):
  labels = {}
  for name in imageNameList:
    label = name.split("__")[0]
    labels[name] = label

  return labels

In [91]:
def setObjects(path,numberOfObjects):
  obj=[]
  cnt=0
  for image_name in os.listdir(path): 
    extension = os.path.splitext(image_name)[1][1:]
    if extension=='jpg' or extension=='png':
      label = image_name.split("__")[0]
      obj.append(label)
      cnt+=1
    if cnt>=numberOfObjects:
      break
  
  return obj


In [120]:
def assignLabel(imageNameList, kmeans):
  labels = {}
  for i in range(len(imageNameList)):
    imageName, _m = imageNameList[i].split("__")
    labels.setdefault(imageName, []).append(kmeans.labels_[i])

  
  for key, value in labels.items():
    count = collections.Counter(value)
    labels[key] = count.most_common(1)[0][0]

  return labels

In [138]:
import matplotlib.pyplot as plt

def visualize_basis_vectors(nmf_model, n_components, width, height):
    fig, axs = plt.subplots(1, n_components, figsize=(15, 5))
    
    for i in range(n_components):
        axs[i].imshow(nmf_model.components_[i].reshape(height, width), cmap='gray')
        axs[i].axis('off')
        axs[i].set_title('Basis Vector ' + str(i+1))
        
    plt.show()


In [139]:
def train(path,n_components, n_clusters): # It requires the images to be in /content
  imageLimit = 2000
  frame_rate=5
  #extractFrameFromImage(path, imageLimit, frame_rate)
  width, height, imageCount=countWidthandHeight(path, 7)
  
  obj= setObjects(path,n_clusters)


  m, imageNameList, imageCount = convertAllImageToMatrix(obj, width, height, imageLimit, path)

  print(m.shape)
  labels=getLabels(imageNameList)
  
  m, mean = normalizeM(m)
  #mean=1
  #w,h=performNMF(m, n_components, len(imageNameList))
  nmf_model = NMF(n_components,max_iter=100, init= 'nndsvd')
  w = nmf_model.fit_transform(m)
  h=nmf_model.components_
  visualize_basis_vectors(nmf_model, n_components, width, height)
  print(w.shape)
  print(h.shape)
  x_new=np.matmul(w,h)
  x_new=np.ceil(x_new*mean)
  x_new=np.transpose(x_new)
  #print("Original Image Set Variance: "+str(variance(np.ceil(np.transpose(m)*mean))))
  #print("NMF Generated Image Set Variance: "+str(variance(x_new)))
  
  kmeans = KMeans(n_clusters, random_state=2)
  kmeans.fit(w)

  print(kmeans.labels_)
  labels=assignLabel(imageNameList,kmeans)

  return width, height, m, w, h, labels, kmeans, imageNameList, nmf_model
  

In [140]:
def test(testPath, width, height, labels, kmeans, nmf_model,imageNameList):
   correct=0
   failed=0
   for image_name in os.listdir(testPath):
    print(image_name) 
    extension = os.path.splitext(image_name)[1][1:]
    check_label=image_name.split("__")[0]
    if extension=='jpg' or extension=='png':
      if image_name not in imageNameList and check_label in labels.keys():
        print(image_name)
        image_path=testPath+'/'+image_name
        image = cv2.imread(image_path) 
        test_label=testSingleFrameMatrix(image, width, height, kmeans, nmf_model)
        actual_label=labels[check_label]
        print("---------------------------")
        print("Label from kmeans prediction: ",test_label)
        print("Actual label: ",actual_label)
        
        if test_label==actual_label:
          correct+=1
        else:
          failed+=1

        print(correct, failed)

   return (correct/(correct+failed)*100)
  

In [None]:
width, height, m,w,h,labels,kmeans,imageNameList, nmf_model=train('/content/coil-100/',49,100)

In [142]:
print(labels)

{'obj62': 27, 'obj32': 55, 'obj25': 8, 'obj66': 15, 'obj80': 74, 'obj6': 85, 'obj42': 13, 'obj7': 27, 'obj4': 77, 'obj30': 24, 'obj94': 94, 'obj87': 69, 'obj85': 73, 'obj93': 27, 'obj91': 10, 'obj33': 60, 'obj67': 39, 'obj23': 13, 'obj48': 19, 'obj55': 65, 'obj54': 62, 'obj10': 8, 'obj51': 64, 'obj98': 39, 'obj8': 30, 'obj35': 11, 'obj12': 48, 'obj46': 59, 'obj14': 25, 'obj28': 2, 'obj20': 72, 'obj83': 29, 'obj36': 34, 'obj2': 79, 'obj92': 6, 'obj79': 90, 'obj95': 26, 'obj34': 47, 'obj71': 56, 'obj40': 84, 'obj56': 4, 'obj50': 9, 'obj74': 46, 'obj18': 14, 'obj17': 22, 'obj70': 5, 'obj39': 23, 'obj45': 92, 'obj31': 38, 'obj29': 55, 'obj44': 96, 'obj5': 20, 'obj11': 77, 'obj64': 88, 'obj61': 6, 'obj16': 63, 'obj65': 40, 'obj53': 67, 'obj63': 2, 'obj52': 22, 'obj38': 13, 'obj58': 76, 'obj37': 53}


In [143]:

print(kmeans.labels_)

[27 17  8 ... 77 14 77]


In [None]:
accuracy=test("/content/coil-100", width, height, labels, kmeans, nmf_model, imageNameList)

In [119]:

print(accuracy)

21.17507886435331


In [111]:

!mv coil-100/* /contentb zx
!ls /content

mv: target 'zx' is not a directory
coil-100  drive  sample_data


In [None]:
!ls /content/drive/MyDrive/Colab\ Notebooks/NMF\ datasets/COIL\ 100/coil-100/coil-100/
!cp /content/drive/MyDrive/Colab\ Notebooks/NMF\ datasets/COIL\ 100/coil-100/coil-100/ . -r
!ls coil-100