##### Import des librairies python et définitions de constantes

In [8]:
import numpy as np
import pickle
#%matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from libtiff import TIFF

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.mixture import BayesianGaussianMixture
from sklearn.preprocessing import normalize
from sklearn.decomposition import PCA

#import ImageFilter

#from search_on_big_images import search_on_big_image
#import search_on_big_images
pickle_save_path = "../../Data/"
pickle_col_augmented_path = "../../Data/col_augmented.pickle"
pickle_col_not_augmented_path = "../../Data/col_not_augmented.pickle"
pickle_grey_augmented_path = "../../Data/grey_augmented.pickle"
pickle_grey_not_augmented_path = "../../Data/grey_not_augmented.pickle"
pickle_hough_val_save_path = "../../Data/hough_val.pickle"

###### Import des données
On essaie d'abord avec les données les plus simples. En Greyscale et sans augmentation des données.

In [2]:
with open(pickle_grey_augmented_path, 'rb') as my_pickle:
    X, Y = pickle.load(my_pickle)
X = X.reshape((X.shape[0],-1))
X_norm = normalize(X.reshape((X.shape[0],-1))) #we normalize the data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=42)

## Decision Tree

In [3]:
# Construction of the Classifier TREE
decision_tree = DecisionTreeClassifier()
#Learn the classifier
decision_tree.fit(X_train, Y_train)
#Compute the score
train_score = decision_tree.score(X_train, Y_train)
test_score = decision_tree.score(X_test, Y_test)
print("Training score : " + str(train_score))
print("Test score : " + str(test_score))
print("\nMatrice de confusion training :")
print(confusion_matrix(Y_train, decision_tree.predict(X_train)))

print("\nMatrice de confusion test :")
print(confusion_matrix(Y_test, decision_tree.predict(X_test)))

Training score : 1.0
Test score : 0.9356617647058824

Matrice de confusion training :
[[2461    0]
 [   0 2434]]

Matrice de confusion test :
[[252  17]
 [ 18 257]]


On fait du sur-apprentissage

### Les k plus proches voisins

In [4]:
#Now we take a stable classifier
neighbors_estimator = KNeighborsClassifier(n_neighbors=3)
#Learn the classifier
neighbors_estimator.fit(X_train, Y_train)
#Compute the score
train_score = neighbors_estimator.score(X_train, Y_train)
test_score = neighbors_estimator.score(X_test, Y_test)
print("Training score : " + str(train_score))
print("Test score : " + str(test_score))
print("\nMatrice de confusion training :")
print(confusion_matrix(Y_train, neighbors_estimator.predict(X_train)))

print("\nMatrice de confusion test :")
print(confusion_matrix(Y_test, neighbors_estimator.predict(X_test)))

Training score : 0.982635342185904
Test score : 0.9577205882352942

Matrice de confusion training :
[[2443   18]
 [  67 2367]]

Matrice de confusion test :
[[265   4]
 [ 19 256]]


## Random forest

In [5]:
randomForest = RandomForestClassifier(n_estimators=200)
randomForest.fit(X_train, Y_train)
train_score = randomForest.score(X_train, Y_train)
test_score = randomForest.score(X_test, Y_test)
print("Training score : " + str(train_score))
print("Test score : " + str(test_score))
print("\nMatrice de confusion training :")
print(confusion_matrix(Y_train, randomForest.predict(X_train)))

print("\nMatrice de confusion test :")
print(confusion_matrix(Y_test, randomForest.predict(X_test)))

Training score : 1.0
Test score : 0.9852941176470589

Matrice de confusion training :
[[2461    0]
 [   0 2434]]

Matrice de confusion test :
[[261   8]
 [  0 275]]


### ACP puis KNN

In [10]:
#reduced_X = PCA(n_components=2).fit_transform(X.reshape((X.shape[0],-1)))
acp_modele = PCA(n_components=3)
reduced_X = acp_modele.fit_transform(X_norm)
plt.plot(reduced_X[np.where(Y==1)[0]][:, 0], reduced_X[np.where(Y==1)[0]][:, 1], 'k.', markersize=8, color = "red")
plt.plot(reduced_X[np.where(Y==0)[0]][:, 0], reduced_X[np.where(Y==0)[0]][:, 1], 'k.', markersize=3, color = "blue")
plt.show()

In [9]:
neighbors_estimator = KNeighborsClassifier(n_neighbors=1)
#Learn the classifier
neighbors_estimator.fit(reduced_X, Y)
#Compute the score
train_score = neighbors_estimator.score(reduced_X, Y)
print("Training score : " + str(train_score))
print("\nMatrice de confusion training :")
print(confusion_matrix(Y, neighbors_estimator.predict(reduced_X)))

Training score : 1.0

Matrice de confusion training :
[[2730    0]
 [   0 2709]]


In [9]:
SIZE_IMAGETTE = 31
RAYON_IMAGETTE = int(SIZE_IMAGETTE/2)

def weightedAverage(pixel):
    return 0.299*pixel[0] + 0.587*pixel[1] + 0.114*pixel[2]

def RGBToGreyscale(image):
  grey_image = np.zeros((image.shape[1], image.shape[2]), dtype = np.uint8) # init 2D numpy array
  # get row number
  for rownum in range(image.shape[1]):
     for colnum in range(image.shape[2]):
        grey_image[rownum,colnum] = int(weightedAverage(image[:,rownum,colnum]))
  return grey_image

def search_on_big_image(acp_modele,modele,image_path, stride = 20, grey_modele = True, from_greyscale = False, size_max_x = None, size_max_y = None):
  #On fait glisser une fenêtre sur toute l'image
  imagettes_new = []
  tif_file = TIFF.open(image_path, mode='r')
  big_image = tif_file.read_image()
  if size_max_x:
      big_image = big_image[:,1000:1000+size_max_x,:]
  if size_max_y:
      big_image = big_image[:,:,1000:1000+size_max_y]
  big_shape = big_image.shape
  print(big_shape)
  for xCoord in np.arange(start = SIZE_IMAGETTE, stop = big_shape[1] - SIZE_IMAGETTE, step = stride):
      for yCoord in np.arange(start = SIZE_IMAGETTE, stop = big_shape[2] - SIZE_IMAGETTE, step = stride):
            if from_greyscale:
                test_imagette = big_image[xCoord-RAYON_IMAGETTE : xCoord+RAYON_IMAGETTE+1, yCoord-RAYON_IMAGETTE : yCoord+RAYON_IMAGETTE+1]                    
            else:
                test_imagette_color = big_image[:, xCoord-RAYON_IMAGETTE : xCoord+RAYON_IMAGETTE+1, yCoord-RAYON_IMAGETTE : yCoord+RAYON_IMAGETTE+1]
                if grey_modele:
                    test_imagette = RGBToGreyscale(test_imagette_color)
            test_imagette = test_imagette.reshape((1,-1))
            if modele.predict(acp_modele.transform(test_imagette))[0] == 1:
                if from_greyscale:
                    imagettes_new.append(test_imagette)
                else:
                    imagettes_new.append(test_imagette_color)
  return imagettes_new

In [45]:
#Image jamais utilisé :
big_image_path = "../../DATA_MARNIERES/DAT_marnieres_epreville_270799_jour/27_07_99/jour/calibre/TIF/Axe2_16h47_24_C.tif"
imagettes_color = search_on_big_image(acp_modele,neighbors_estimator,big_image_path, size_max_x = 800, size_max_y = 800)

(3, 800, 800)


#### Test sur grande image

In [21]:
neighbors_estimator = KNeighborsClassifier(n_neighbors=3)
#Learn the classifier
neighbors_estimator.fit(X, Y)
train_score = randomForest.score(X, Y)
print("Training score : " + str(train_score))

Training score : 0.9930795847750865


In [10]:
SIZE_IMAGETTE = 31
RAYON_IMAGETTE = int(SIZE_IMAGETTE/2)

def weightedAverage(pixel):
    return 0.299*pixel[0] + 0.587*pixel[1] + 0.114*pixel[2]

def RGBToGreyscale(image):
  grey_image = np.zeros((image.shape[1], image.shape[2]), dtype = np.uint8) # init 2D numpy array
  # get row number
  for rownum in range(image.shape[1]):
     for colnum in range(image.shape[2]):
        grey_image[rownum,colnum] = int(weightedAverage(image[:,rownum,colnum]))
  return grey_image

def search_on_big_image(modele,image_path, stride = 20, grey_modele = True, from_greyscale = False, size_max_x = None, size_max_y = None):
  #On fait glisser une fenêtre sur toute l'image
  imagettes_new = []
  tif_file = TIFF.open(image_path, mode='r')
  big_image = tif_file.read_image()
  if size_max_x:
      big_image = big_image[:,1000:1000+size_max_x,:]
  if size_max_y:
      big_image = big_image[:,:,1000:1000+size_max_y]
  big_shape = big_image.shape
  print(big_shape)
  for xCoord in np.arange(start = SIZE_IMAGETTE, stop = big_shape[1] - SIZE_IMAGETTE, step = stride):
      for yCoord in np.arange(start = SIZE_IMAGETTE, stop = big_shape[2] - SIZE_IMAGETTE, step = stride):
            if from_greyscale:
                test_imagette = big_image[xCoord-RAYON_IMAGETTE : xCoord+RAYON_IMAGETTE+1, yCoord-RAYON_IMAGETTE : yCoord+RAYON_IMAGETTE+1]                    
            else:
                test_imagette_color = big_image[:, xCoord-RAYON_IMAGETTE : xCoord+RAYON_IMAGETTE+1, yCoord-RAYON_IMAGETTE : yCoord+RAYON_IMAGETTE+1]
                if grey_modele:
                    test_imagette = RGBToGreyscale(test_imagette_color)
            test_imagette = test_imagette.reshape((1,-1))
            if modele.predict(test_imagette)[0] == 1:
                if from_greyscale:
                    imagettes_new.append(test_imagette)
                else:
                    imagettes_new.append(test_imagette_color)
  return imagettes_new

In [11]:
#Image jamais utilisé :
big_image_path = "../../DATA_MARNIERES/DAT_marnieres_epreville_270799_jour/27_07_99/jour/calibre/TIF/Axe2_16h47_24_C.tif"
imagettes_color = search_on_big_image(randomForest,big_image_path, size_max_x = 1000, size_max_y = 1000)

(3, 1000, 1000)


In [12]:
print(len(imagettes_color))

59


In [13]:
nb_imagettes = len(imagettes_color)
if nb_imagettes > 100:
    nb_imagettes = 100
    imagettes_color_reduce = imagettes_color[:nb_imagettes]
else:
    imagettes_color_reduce = imagettes_color
nb_col = 9
nb_row = int(nb_imagettes // nb_col) +1
pos = 0
plt.figure(figsize=(18, 18))
for imagette in imagettes_color_reduce:
    pos +=1
    plt.subplot(nb_row,nb_col,pos)
    plt.imshow(np.moveaxis(imagette,0,2))
plt.show()