In [None]:
from google.colab import drive
import numpy as np
import cv2
import pandas as pd
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
import csv

from sklearn.cluster import MiniBatchKMeans
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [None]:
drive.mount('/drive') 

Mounted at /drive


In [None]:
train_Les = np.load('/drive/My Drive/CAD_1_1/train_Les_hairRemoved.npz',allow_pickle=True)
train_NV  = np.load('/drive/My Drive/CAD_1_1/train_NV_hairRemoved.npz',allow_pickle=True)
val_Les   = np.load('/drive/My Drive/CAD_1_1/val_Les_hairRemoved.npz',allow_pickle=True)
val_NV    = np.load('/drive/My Drive/CAD_1_1/val_NV_hairRemoved.npz',allow_pickle=True)


In [None]:
trainX_Les  = np.array(train_Les['arr_0'])
trainX_NV   = np.array(train_NV['arr_0'])
valX_Les    = np.array(val_Les['arr_0'])
valX_NV     = np.array(val_NV['arr_0'])

In [None]:
print(trainX_Les.shape)
print(trainX_NV.shape)
print(valX_Les.shape)
print(valX_NV.shape)

(2400, 450, 600, 3)
(2400, 450, 600, 3)
(600, 450, 600, 3)
(600, 450, 600, 3)


In [None]:
orb = cv2.ORB_create()

In [None]:
orbFeatures_trainX_Les  = []
orbFeatures_trainX_NV   = []
orbFeatures_valX_Les    = []
orbFeatures_valX_NV     = []

for x in tqdm(trainX_Les):
  kp, des = orb.detectAndCompute(x, None)
  if des is not None:
    orbFeatures_trainX_Les.append(des)

for x in tqdm(trainX_NV):
  kp, des = orb.detectAndCompute(x, None)
  if des is not None:
    orbFeatures_trainX_NV.append(des)

for x in tqdm(valX_Les):
  kp, des = orb.detectAndCompute(x, None)
  if des is not None:
    orbFeatures_valX_Les.append(des)

for x in tqdm(valX_NV):
  kp, des = orb.detectAndCompute(x, None)
  if des is not None:
    orbFeatures_valX_NV.append(des)


100%|██████████| 2400/2400 [00:18<00:00, 130.52it/s]
100%|██████████| 2400/2400 [00:17<00:00, 137.23it/s]
100%|██████████| 600/600 [00:04<00:00, 133.03it/s]
100%|██████████| 600/600 [00:04<00:00, 132.84it/s]


In [None]:
orbFeatures_trainX_Les  = np.vstack(orbFeatures_trainX_Les)
orbFeatures_trainX_NV   = np.vstack(orbFeatures_trainX_NV)
orbFeatures_valX_Les    = np.vstack(orbFeatures_valX_Les)
orbFeatures_valX_NV     = np.vstack(orbFeatures_valX_NV)

In [None]:
print(orbFeatures_trainX_Les.shape)
print(orbFeatures_trainX_NV.shape)
print(orbFeatures_valX_Les.shape)
print(orbFeatures_valX_NV.shape)

(424598, 32)
(283280, 32)
(108318, 32)
(72606, 32)


In [None]:
kmeans_orbFeatures_trainX = MiniBatchKMeans(n_clusters=2*10, batch_size=trainX_Les.shape[0]*3*2, verbose=0).fit(np.concatenate((orbFeatures_trainX_Les,orbFeatures_trainX_NV)))

In [None]:

histo_orbFeatures_trainX = []


for x in tqdm(np.concatenate((trainX_Les,trainX_NV),axis=0)):
  kp, des = orb.detectAndCompute(x, None)

  histo = np.zeros(2*10)
  nkp = np.size(kp)

  if des is not None:
    kmeans_orbFeatures_trainX.predict(des)
    # hist = plt.hist()
    for d in des:
      idx = kmeans_orbFeatures_trainX.predict([d])
      histo[idx] += 1/nkp # Because we need normalized histograms, I prefere to add 1/nkp directly

  histo_orbFeatures_trainX.append(histo)




100%|██████████| 4800/4800 [06:42<00:00, 11.92it/s]


In [None]:
histo_orbFeatures_valX = []


for x in tqdm(np.concatenate((valX_Les,valX_NV),axis=0)):
  kp, des = orb.detectAndCompute(x, None)

  histo = np.zeros(2*10)
  nkp = np.size(kp)

  if des is not None:
    kmeans_orbFeatures_trainX.predict(des)
    for d in des:
      idx = kmeans_orbFeatures_trainX.predict([d])
      histo[idx] += 1/nkp 

  histo_orbFeatures_valX.append(histo)

100%|██████████| 1200/1200 [01:42<00:00, 11.73it/s]


In [None]:
print(np.vstack(histo_orbFeatures_trainX).shape)
print(np.vstack(histo_orbFeatures_valX).shape)


(4800, 20)
(1200, 20)


In [None]:
trainData = pd.DataFrame(histo_orbFeatures_trainX)
trainData['labels']=0
trainData['labels'][0:2400]=1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [None]:
valData = pd.DataFrame(histo_orbFeatures_valX)
valData['labels']=0
valData['labels'][0:600]=1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [None]:
trainData.to_csv('/drive/My Drive/CAD_1_1/ORB_Train_hairRemoved.csv',index=False)
valData.to_csv('/drive/My Drive/CAD_1_1/ORB_val_hairRemoved.csv',index=False)

In [None]:
trainDataShuffle=trainData.sample(frac=1)
valDataShuffle=valData.sample(frac=1)

In [None]:
trainDataShuffle

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,labels
2459,0.050955,0.025478,0.095541,0.038217,0.050955,0.044586,0.012739,0.044586,0.038217,0.006369,0.019108,0.082803,0.082803,0.146497,0.063694,0.038217,0.076433,0.031847,0.031847,0.019108,0
3064,0.025000,0.000000,0.025000,0.137500,0.087500,0.000000,0.000000,0.150000,0.087500,0.000000,0.012500,0.025000,0.087500,0.050000,0.012500,0.087500,0.037500,0.175000,0.000000,0.000000,0
505,0.000000,0.000000,0.000000,0.000000,0.500000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.500000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1
2874,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0
1007,0.222222,0.444444,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.111111,0.111111,0.111111,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2211,0.000000,0.075188,0.007519,0.000000,0.000000,0.097744,0.030075,0.000000,0.007519,0.240602,0.112782,0.045113,0.022556,0.030075,0.007519,0.000000,0.015038,0.007519,0.127820,0.172932,1
720,0.032544,0.017751,0.053254,0.059172,0.076923,0.032544,0.020710,0.050296,0.059172,0.059172,0.041420,0.047337,0.065089,0.106509,0.020710,0.059172,0.062130,0.053254,0.038462,0.044379,1
3534,0.017857,0.000000,0.071429,0.250000,0.035714,0.035714,0.017857,0.000000,0.000000,0.017857,0.000000,0.000000,0.035714,0.017857,0.250000,0.125000,0.035714,0.017857,0.071429,0.000000,0
4478,0.000000,0.032787,0.049180,0.049180,0.098361,0.000000,0.000000,0.131148,0.065574,0.016393,0.000000,0.032787,0.163934,0.081967,0.049180,0.049180,0.065574,0.098361,0.000000,0.016393,0


In [None]:
mlp = MLPClassifier(verbose=False, max_iter=600000)
mlp.fit(trainDataShuffle.drop(["labels"],axis=1), trainDataShuffle['labels'])
mlp.score(valDataShuffle.drop(["labels"],axis=1),valDataShuffle['labels'])

0.6825

In [None]:
rndst=RandomForestClassifier(n_estimators=100)
rndst.fit(trainDataShuffle.drop(["labels"],axis=1), trainDataShuffle['labels'])
rndst.score(valDataShuffle.drop(["labels"],axis=1),valDataShuffle['labels'])

0.6983333333333334

In [None]:
best_svc = SVC()
best_svc.fit(trainDataShuffle.drop(["labels"],axis=1), trainDataShuffle['labels'])
best_svc.score(valDataShuffle.drop(["labels"],axis=1),valDataShuffle['labels'])

0.6883333333333334

In [None]:
#Worse than with hair