In [15]:

from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
%matplotlib notebook
from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split
import skimage
from skimage.io import imread
from skimage.transform import resize
import pandas as pd

In [2]:
def load_image_files(container_path, dimension=(64, 64)):
    """
    Load image files with categories as subfolder names 
    which performs like scikit-learn sample dataset
    
    Parameters
    ----------
    container_path : string or unicode
        Path to the main folder holding one subfolder per category
    dimension : tuple
        size to which image are adjusted to
        
    Returns
    -------
    Bunch
    """
    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    categories = [fo.name for fo in folders]

    descr = "A image classification dataset"
    images = []
    flat_data = []
    target = []
    for i, direc in enumerate(folders):
        for file in direc.iterdir():
            img = skimage.io.imread(file)
            img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
                        
            flat_data.append(img_resized.flatten()) 
            images.append(img_resized)
            target.append(i)
    flat_data = np.array(flat_data)
    target = np.array(target)
    images = np.array(images)

    return Bunch(data=flat_data,
                 target=target,
                 target_names=categories,
                 images=images,
                 DESCR=descr)

In [3]:
image_dataset = load_image_files("images_svm/")

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    image_dataset.data, image_dataset.target, test_size=0.3,random_state=109)

In [5]:
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline

# pca = PCA(n_components=2, svd_solver='randomized', whiten=True, random_state=42)

param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
 ]
svc = svm.SVC()
model = GridSearchCV(svc, param_grid, verbose=1, n_jobs=-1)

# model = make_pipeline(pca, clf)
model.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   13.6s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:   20.4s finished


GridSearchCV(estimator=SVC(), n_jobs=-1,
             param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
                          'kernel': ['rbf']}],
             verbose=1)

In [6]:
# pca.fit_transform(X_test,y_test)

model.score(X_test,y_test)

0.8148148148148148

In [7]:
y_pred = model.predict(X_test)

In [12]:
# from sklearn.metrics import confusion_matrix
# import pandas as pd

# pd.DataFrame(
#     confusion_matrix(y_test, y_pred),
#     columns=['Positive', 'Negatif'],
#     index=['True', 'False']
# )

In [8]:
print("Classification report for - \n{}:\n{}\n".format(
    model, metrics.classification_report(y_test, y_pred)))

Classification report for - 
GridSearchCV(estimator=SVC(), n_jobs=-1,
             param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
                          'kernel': ['rbf']}],
             verbose=1):
              precision    recall  f1-score   support

           0       1.00      0.20      0.33         5
           1       0.62      0.71      0.67         7
           2       0.94      0.84      0.89        19
           3       0.50      0.83      0.62         6
           4       0.94      0.88      0.91        17
           5       0.93      0.78      0.85        18
           6       1.00      0.95      0.97        20
           7       0.60      1.00      0.75         9
           8       0.57      0.57      0.57         7

    accuracy                           0.81       108
   macro avg       0.79      0.75      0.73       108
weighted avg       0.86      0.81      0.81       108




In [9]:
images_a_predire = load_image_files("crop/")

In [10]:
predictions = model.predict(images_a_predire.data)

In [11]:
predictions

array([6, 6, 3, 6, 6, 6, 2, 2, 2, 3, 1, 6, 6, 6, 6, 3, 5, 4, 5, 5, 8, 8,
       0, 3, 6, 0, 2, 3, 7, 5, 6, 6, 6, 2, 6, 5, 6, 6, 6, 6, 6, 6, 6, 2,
       2, 4, 4, 7, 2, 3, 3, 7, 5, 8, 6, 6, 3, 5, 5, 5, 2, 3, 3, 1, 6, 5,
       5, 4, 0, 8, 1, 3, 3, 2, 2, 1, 3, 6, 6, 0, 6, 3, 5, 8, 1, 3, 3, 6,
       5, 1, 3, 2, 3, 4, 8, 8, 2, 1, 3, 7, 5, 2, 6, 6, 6, 6, 6, 6, 6, 4,
       3, 3, 0, 2, 5, 2, 6, 5, 4, 5, 4, 8, 2, 2, 7, 2, 3, 7, 5, 4, 3, 3,
       1, 6, 6, 2, 6, 2, 2, 2, 2, 4, 5, 0, 3, 3, 7, 3, 2, 2, 2, 4, 5, 5,
       5, 8, 1, 0, 6, 6, 6, 6, 6, 2, 2, 4, 2, 3, 3, 6, 3, 1, 5, 4, 5, 5,
       2, 2, 7, 6, 4, 8, 8, 2, 8, 6, 3, 6, 6, 2, 6, 2, 2, 0, 1, 0, 6, 1,
       1, 6, 6, 6, 4, 4, 5, 5, 2, 1, 7, 3, 4, 5, 6, 8, 8, 2, 2, 6, 6, 6,
       1, 6, 6, 2, 6, 6, 6, 0, 0, 1, 6, 3, 3, 6, 5, 6, 4, 4, 8, 8, 2, 3,
       0, 2, 2, 0, 6, 7, 7, 4, 6, 6, 1, 6, 4, 5, 2, 2, 2, 5, 6, 6, 6, 6,
       4, 4, 4, 4, 3, 5, 8, 3, 3, 3, 2, 7, 4, 4, 6, 6, 6, 2, 7, 4, 5, 2,
       5, 5, 5, 2, 5, 6, 3, 5, 4, 4, 8, 2, 4, 4, 3,

In [12]:
def load_crop_images_to_df(container_path):
    
    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    categories = [fo.name for fo in folders]
    
    images = []
    
    for i, direc in enumerate(folders):
        for file in direc.iterdir():
            img = skimage.io.imread(file)
            
            images.append(img)
            
    # images = np.array(images)

    return images

In [13]:
data = load_crop_images_to_df("crop/")

In [16]:
df = pd.DataFrame(data, columns=['images'])

In [17]:
df['type_de_dent'] = predictions

In [18]:
df.head()

Unnamed: 0,images,type_de_dent
0,"[[[115, 115, 115], [117, 117, 117], [120, 120,...",6
1,"[[[99, 99, 99], [95, 95, 95], [96, 96, 96], [9...",6
2,"[[[70, 70, 70], [67, 67, 67], [66, 66, 66], [6...",3
3,"[[[169, 169, 169], [174, 174, 174], [172, 172,...",6
4,"[[[132, 132, 132], [128, 128, 128], [128, 128,...",6


In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 531 entries, 0 to 530
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   images        531 non-null    object
 1   type_de_dent  531 non-null    int32 
dtypes: int32(1), object(1)
memory usage: 6.3+ KB


In [20]:
# import cv2

# im_dent = df.loc[1, 'images']
# cv2.imshow("Image", im_dent)