In [1]:
# USAGE
# python classify_images.py
# python classify_images.py --model svm

# import the necessary packages
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from xgboost import XGBClassifier
from PIL import Image
from imutils import paths
import cv2
import numpy as np
import argparse
import os
import random



In [2]:
# initialize the data and labels
print("[INFO] loading images...")
data = []
labels = []

# grab the image paths and randomly shuffle them
imagePaths = sorted(list(paths.list_images('dataset')))
random.seed(42)
random.shuffle(imagePaths)

# loop over the input images
for imagePath in imagePaths:
    # load the image, resize the image to be 32x32 pixels (ignoring
    # aspect ratio), flatten the image into 32x32x3=3072 pixel image
    # into a list, and store the image in the data list
    image = cv2.imread(imagePath)
    image = cv2.resize(image, (32, 32)).flatten() #keep in mind this resize dimension for predict later
    data.append(image)
 
    # extract the class label from the image path and update the labels list
    # label according to the folder
    label = imagePath.split(os.path.sep)[-2]
    labels.append(label)
print("done")

[INFO] loading images...
done


In [3]:
# data before scaling
data

[array([  3,   9,  17, ..., 118, 118, 123], dtype=uint8),
 array([16, 40, 67, ..., 13, 35, 67], dtype=uint8),
 array([148, 177, 199, ..., 146, 146, 146], dtype=uint8),
 array([188, 193, 201, ...,  13,  76, 123], dtype=uint8),
 array([231, 198, 182, ...,  45,  98, 166], dtype=uint8),
 array([180, 190, 207, ..., 141, 162, 189], dtype=uint8),
 array([ 10,   9,   3, ..., 193, 198, 198], dtype=uint8),
 array([232, 229, 221, ..., 175, 167, 166], dtype=uint8),
 array([190, 207, 225, ..., 164, 155, 176], dtype=uint8),
 array([ 83, 132, 164, ...,  27,  22,  37], dtype=uint8),
 array([21, 24, 24, ..., 14, 16, 16], dtype=uint8),
 array([130,  66,   8, ...,  23,  15, 139], dtype=uint8),
 array([197, 232, 236, ..., 187, 227, 232], dtype=uint8),
 array([126, 145, 158, ...,   2,   2,  24], dtype=uint8),
 array([ 95, 133, 137, ...,  99, 120, 135], dtype=uint8),
 array([235, 193,   5, ...,   7,   9, 226], dtype=uint8),
 array([ 14,  32,  70, ..., 110, 133, 131], dtype=uint8),
 array([118, 135, 159, ...

In [4]:
# scale the raw pixelintensities to the range [0, 1] for each class
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)

In [5]:
# data after wrapping into one array and after scaling
data

array([[0.01176471, 0.03529412, 0.06666667, ..., 0.4627451 , 0.4627451 ,
        0.48235294],
       [0.0627451 , 0.15686275, 0.2627451 , ..., 0.05098039, 0.1372549 ,
        0.2627451 ],
       [0.58039216, 0.69411765, 0.78039216, ..., 0.57254902, 0.57254902,
        0.57254902],
       ...,
       [0.56078431, 0.58039216, 0.57647059, ..., 0.16470588, 0.17254902,
        0.23137255],
       [0.00392157, 0.00392157, 0.00392157, ..., 0.        , 0.01176471,
        0.01176471],
       [0.26666667, 0.35686275, 0.44313725, ..., 0.15686275, 0.18823529,
        0.18431373]])

In [6]:
labels

array(['Bad_Excuse', 'Bad_Excuse', 'Clean_Room', ..., 'Clean_Room',
       'Bad_Excuse', 'Bad_Excuse'], dtype='<U10')

In [7]:
# partition the data into 75% training and 25% validation
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42)

In [8]:
trainX

array([[0.42745098, 0.53333333, 0.67843137, ..., 0.49411765, 0.76470588,
        0.82352941],
       [0.23529412, 0.58039216, 0.87843137, ..., 0.38823529, 0.50980392,
        0.63921569],
       [0.6745098 , 0.64705882, 0.81960784, ..., 0.43137255, 0.56078431,
        0.7372549 ],
       ...,
       [0.23137255, 0.21960784, 0.22745098, ..., 0.03137255, 0.03921569,
        0.07058824],
       [0.97254902, 0.97254902, 0.97254902, ..., 0.94509804, 0.93333333,
        0.94117647],
       [0.28627451, 0.30196078, 0.43921569, ..., 0.05490196, 0.00392157,
        0.00392157]])

In [9]:
trainY

array(['Messy_Room', 'Clean_Room', 'Clean_Room', ..., 'Bad_Excuse',
       'Messy_Room', 'Messy_Room'], dtype='<U10')

In [10]:
# define the dictionary of models our script can use, where the key
# to the dictionary is the name of the model (supplied via command
# line argument) and the value is the model itself
models = {
	"knn": KNeighborsClassifier(n_neighbors=1),
	"naive_bayes": GaussianNB(),
	"logit": LogisticRegression(solver="lbfgs", multi_class="auto"),
	"svm": SVC(kernel="linear"),
	"decision_tree": DecisionTreeClassifier(),
	"random_forest": RandomForestClassifier(n_estimators=100),
	"mlp": MLPClassifier()
# 	"xgb": XGBClassifier()    
}



In [71]:
# define the dictionary of models our script can use, where the key
# to the dictionary is the name of the model (supplied via command
# line argument) and the value is the model itself
models = {
	"knn": KNeighborsClassifier(n_neighbors=1),
	"naive_bayes": GaussianNB(),
	"logit": LogisticRegression(solver="lbfgs", multi_class="auto"),
	"svm": SVC(kernel="linear"),
	"decision_tree": DecisionTreeClassifier(),
	"random_forest": RandomForestClassifier(n_estimators=100),
	"mlp": MLPClassifier()
# 	"xgb": XGBClassifier()    
}

In [11]:
# encode the labels, converting them from strings to integers
# vector format for each class (0,1,2) 0 for clean, 1 for dirty and 2 for excuse
le = LabelEncoder()
labels = le.fit_transform(labels)

In [15]:
labels

array([0, 0, 1, ..., 1, 0, 0], dtype=int64)

### XGBoost

In [73]:
# train the model
print("[INFO] using '{}' model".format("XGBoost"))
model = XGBClassifier()
model.fit(trainX, trainY)

[INFO] using 'XGBoost' model


XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='multi:softprob', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

In [74]:
# make predictions on our data and show a classification report
print("[INFO] evaluating XGBoostClassifier")
predictions = model.predict(testX)
print(classification_report(testY, predictions,
	target_names=le.classes_))

[INFO] evaluating XGBoostClassifier
              precision    recall  f1-score   support

  Bad_Excuse       0.87      0.91      0.89       160
  Clean_Room       0.74      0.76      0.75       236
  Messy_Room       0.70      0.64      0.67       162

    accuracy                           0.77       558
   macro avg       0.77      0.77      0.77       558
weighted avg       0.76      0.77      0.76       558



In [81]:
# make prediction

# import the necessary packages
import cv2
# load the input image and resize it to the target spatial dimensions
width = 32 # remember the input dimension used to resize the train data
height = 32 # remember the input dimension used to resize the train data
image = cv2.imread("images\SGStove.jpg")
output = image.copy()
image = cv2.resize(image, (width, height)).flatten() #the train data were flattened so predict image must be flatten too

# scale the pixel values to [0, 1]
# data = np.array(data, dtype="float") / 255.0
image = np.array(image, dtype="float") / 255.0
image = image.reshape((1, image.shape[0]))

# # make a prediction on the image
# preds = model.predict(image)
# proba = model.predict_proba(image)

# print (preds)
# print (proba)
# # zip(model.classes_, model.predict_proba(image))
# # # find the class label index with the largest corresponding probability
# # # i = preds.argmax(axis=1)[0]
# # # label = le.classes_[i]

# # draw the class label + probability on the output image
# text = "{}: {:.2f}%".format(label, preds[0][i] * 100)
# cv2.putText(output, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

# # show the output image
# cv2.imshow("Image", output)
# cv2.waitKey(0)   # Delay in milliseconds. 0 is the special value that means “forever”, until you close the image window

In [86]:
#print(image)
# make a prediction on the image
preds = model.predict(image)
preds
proba = model.predict_proba(image)
proba

array([[0.0037402, 0.6213763, 0.3748835]], dtype=float32)

In [None]:
import pandas as pd
test = [[0,1,1,0],[1,1,1,0]]
pd.DataFrame(clf.predict_proba(test), columns=clf.classes_)

### KNN Model

In [42]:
# train the model
print("[INFO] using '{}' model".format("knn"))
model = models["knn"]
model.fit(trainX, trainY)

[INFO] using 'knn' model


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='uniform')

In [43]:
# make predictions on our data and show a classification report
print("[INFO] evaluating KNeighborsClassifier")
predictions = model.predict(testX)
print(classification_report(testY, predictions,
	target_names=le.classes_))

[INFO] evaluating KNeighborsClassifier
              precision    recall  f1-score   support

  Bad_Excuse       0.36      0.99      0.53       160
  Clean_Room       0.57      0.17      0.26       236
  Messy_Room       0.91      0.26      0.40       162

    accuracy                           0.43       558
   macro avg       0.61      0.47      0.40       558
weighted avg       0.61      0.43      0.38       558



### GaussianNB

In [44]:
# train the model
print("[INFO] using '{}' model".format("naive_bayes"))
model = models["naive_bayes"]
model.fit(trainX, trainY)

[INFO] using 'naive_bayes' model


GaussianNB(priors=None, var_smoothing=1e-09)

In [45]:
# make predictions on our data and show a classification report
print("[INFO] evaluating GaussianNB")
predictions = model.predict(testX)
print(classification_report(testY, predictions,
	target_names=le.classes_))

[INFO] evaluating GaussianNB
              precision    recall  f1-score   support

  Bad_Excuse       0.74      0.78      0.76       160
  Clean_Room       0.64      0.53      0.58       236
  Messy_Room       0.55      0.66      0.60       162

    accuracy                           0.64       558
   macro avg       0.64      0.66      0.65       558
weighted avg       0.64      0.64      0.64       558



### LogisticRegression

In [46]:
# train the model
print("[INFO] using '{}' model".format("logit"))
model = models["logit"]
model.fit(trainX, trainY)

[INFO] using 'logit' model




LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [47]:
# make predictions on our data and show a classification report
print("[INFO] evaluating LogisticRegression")
predictions = model.predict(testX)
print(classification_report(testY, predictions,
	target_names=le.classes_))

[INFO] evaluating LogisticRegression
              precision    recall  f1-score   support

  Bad_Excuse       0.61      0.75      0.67       160
  Clean_Room       0.59      0.60      0.60       236
  Messy_Room       0.62      0.46      0.53       162

    accuracy                           0.60       558
   macro avg       0.61      0.60      0.60       558
weighted avg       0.61      0.60      0.60       558



### SVC

In [48]:
# train the model
print("[INFO] using '{}' model".format("svm"))
model = models["svm"]
model.fit(trainX, trainY)

[INFO] using 'svm' model


SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [52]:
# make predictions on our data and show a classification report
print("[INFO] evaluating SVC")
predictions = model.predict(testX)
print(classification_report(testY, predictions,
	target_names=le.classes_))

[INFO] evaluating SVC
              precision    recall  f1-score   support

  Bad_Excuse       0.76      0.74      0.75       160
  Clean_Room       0.59      0.56      0.57       236
  Messy_Room       0.53      0.59      0.56       162

    accuracy                           0.62       558
   macro avg       0.63      0.63      0.63       558
weighted avg       0.62      0.62      0.62       558



### DecisionTree

In [50]:
# train the model
print("[INFO] using '{}' model".format("decision_tree"))
model = models["decision_tree"]
model.fit(trainX, trainY)

[INFO] using 'decision_tree' model


DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best')

In [51]:
# make predictions on our data and show a classification report
print("[INFO] evaluating DecisionTree")
predictions = model.predict(testX)
print(classification_report(testY, predictions,
	target_names=le.classes_))

[INFO] evaluating DecisionTree
              precision    recall  f1-score   support

  Bad_Excuse       0.76      0.74      0.75       160
  Clean_Room       0.59      0.56      0.57       236
  Messy_Room       0.53      0.59      0.56       162

    accuracy                           0.62       558
   macro avg       0.63      0.63      0.63       558
weighted avg       0.62      0.62      0.62       558



### RandomForest

In [53]:
# train the model
print("[INFO] using '{}' model".format("random_forest"))
model = models["random_forest"]
model.fit(trainX, trainY)

[INFO] using 'random_forest' model


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [54]:
# make predictions on our data and show a classification report
print("[INFO] evaluating RandomForest")
predictions = model.predict(testX)
print(classification_report(testY, predictions,
	target_names=le.classes_))

[INFO] evaluating RandomForest
              precision    recall  f1-score   support

  Bad_Excuse       0.89      0.88      0.88       160
  Clean_Room       0.69      0.78      0.74       236
  Messy_Room       0.70      0.57      0.63       162

    accuracy                           0.75       558
   macro avg       0.76      0.74      0.75       558
weighted avg       0.75      0.75      0.75       558



### MLPClassifier

In [55]:
# train the model
print("[INFO] using '{}' model".format("mlp"))
model = models["mlp"]
model.fit(trainX, trainY)

[INFO] using 'mlp' model




MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_iter=200, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [56]:
# make predictions on our data and show a classification report
print("[INFO] evaluating MLPClassifier")
predictions = model.predict(testX)
print(classification_report(testY, predictions,
	target_names=le.classes_))

[INFO] evaluating MLPClassifier
              precision    recall  f1-score   support

  Bad_Excuse       0.60      0.91      0.73       160
  Clean_Room       0.68      0.47      0.56       236
  Messy_Room       0.66      0.62      0.64       162

    accuracy                           0.64       558
   macro avg       0.65      0.67      0.64       558
weighted avg       0.65      0.64      0.63       558

