In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
%matplotlib notebook
from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split

from skimage.io import imread
from skimage.transform import resize
from tqdm import tqdm

  "class": algorithms.Blowfish,


In [2]:
def load_image_files(container_path, dimension=(64, 64)):

    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    categories = [fo.name for fo in folders]

    descr = "A image classification dataset"
    images = []
    flat_data = []
    target = []
    for i, direc in enumerate(folders):
        for file in direc.iterdir():
            #img = skimage.io.imread(file)
            img = imread(file)
            img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
            flat_data.append(img_resized.flatten()) 
            images.append(img_resized)
            target.append(i)
    flat_data = np.array(flat_data)
    target = np.array(target)
    images = np.array(images)

    return Bunch(data=flat_data,
                 target=target,
                 target_names=categories,
                 images=images,
                 DESCR=descr)

In [3]:
image_dataset = load_image_files("Data_Set_Argho/train/")

In [4]:
test_dataset = load_image_files("Data_Set_Argho/test/")

In [5]:
X_train, y_train = image_dataset.data, image_dataset.target
X_test, y_test = test_dataset.data, test_dataset.target

In [6]:
X_train.shape,y_train.shape

((18996, 12288), (18996,))

In [7]:
X_test.shape,y_test.shape

((4740, 12288), (4740,))

In [8]:
from sklearn.naive_bayes import GaussianNB

NB = GaussianNB()
NB.fit(X_train, y_train)

In [9]:
y_pred = NB.predict(X_test)

In [10]:
print("Classification report for - \n{}:\n{}\n".format(
    NB, metrics.classification_report(y_test, y_pred)))

Classification report for - 
GaussianNB():
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       708
           1       0.00      0.00      0.00       732
           2       0.03      0.03      0.03       444
           3       0.13      0.33      0.19       624
           4       0.08      0.03      0.04       768
           5       0.12      0.27      0.17       744
           6       0.11      0.07      0.08       720

    accuracy                           0.10      4740
   macro avg       0.07      0.10      0.07      4740
weighted avg       0.07      0.10      0.07      4740




In [11]:
y_pred.size

4740

In [12]:
y_test

array([0, 0, 0, ..., 6, 6, 6])

In [15]:
accuracy_score(y_test, y_pred)

0.10464135021097046

In [16]:
from sklearn.tree import DecisionTreeClassifier
DT = DecisionTreeClassifier()
DT = DT.fit(X_train,y_train)

In [17]:
#Predict the response for test dataset
y_pred = DT.predict(X_test)

In [18]:
print("Classification report for - \n{}:\n{}\n".format(
    DT, metrics.classification_report(y_test, y_pred)))

Classification report for - 
DecisionTreeClassifier():
              precision    recall  f1-score   support

           0       0.15      0.15      0.15       708
           1       0.14      0.14      0.14       732
           2       0.27      0.25      0.26       444
           3       0.24      0.22      0.23       624
           4       0.17      0.18      0.17       768
           5       0.45      0.40      0.42       744
           6       0.20      0.23      0.22       720

    accuracy                           0.22      4740
   macro avg       0.23      0.22      0.23      4740
weighted avg       0.23      0.22      0.23      4740




In [19]:
y_pred

array([4, 2, 3, ..., 6, 0, 6])

In [20]:
y_test

array([0, 0, 0, ..., 6, 6, 6])

In [22]:
accuracy_score(y_test, y_pred)

0.22362869198312235

In [23]:
from sklearn.neighbors import KNeighborsClassifier
KNN = KNeighborsClassifier(n_neighbors=3)

# Train the model using the training sets
KNN.fit(X_train,y_train)

In [24]:
#Predict the response for test dataset
y_pred = KNN.predict(X_test)

In [25]:
print("Classification report for - \n{}:\n{}\n".format(
    KNN, metrics.classification_report(y_test, y_pred)))

Classification report for - 
KNeighborsClassifier(n_neighbors=3):
              precision    recall  f1-score   support

           0       0.10      0.15      0.12       708
           1       0.14      0.17      0.15       732
           2       0.58      0.31      0.40       444
           3       0.55      0.35      0.43       624
           4       0.15      0.12      0.13       768
           5       0.66      0.62      0.64       744
           6       0.17      0.22      0.19       720

    accuracy                           0.27      4740
   macro avg       0.34      0.28      0.29      4740
weighted avg       0.32      0.27      0.29      4740




In [26]:
y_pred

array([1, 1, 1, ..., 6, 6, 6])

In [27]:
y_test

array([0, 0, 0, ..., 6, 6, 6])

In [28]:
accuracy_score(y_test, y_pred)

0.2721518987341772

In [29]:
from sklearn.ensemble import RandomForestClassifier

RF = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 42)
RF.fit(X_train, y_train)

In [30]:
y_pred = RF.predict(X_test)

In [31]:
print("Classification report for - \n{}:\n{}\n".format(
    RF, metrics.classification_report(y_test, y_pred)))

Classification report for - 
RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=42):
              precision    recall  f1-score   support

           0       0.13      0.18      0.15       708
           1       0.11      0.15      0.13       732
           2       0.53      0.37      0.43       444
           3       0.34      0.28      0.31       624
           4       0.18      0.14      0.16       768
           5       0.66      0.58      0.62       744
           6       0.19      0.18      0.18       720

    accuracy                           0.26      4740
   macro avg       0.31      0.27      0.28      4740
weighted avg       0.29      0.26      0.27      4740




In [32]:
y_pred

array([0, 1, 0, ..., 6, 6, 6])

In [33]:
y_test

array([0, 0, 0, ..., 6, 6, 6])

In [34]:
accuracy_score(y_test, y_pred)

0.2622362869198312