In [4]:
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from imutils import paths
import cv2
import glob

In [5]:
class SimplePreprocessor:
	def __init__(self, width, height, inter=cv2.INTER_AREA):
		# store the target image width, height, and interpolation
		# method used when resizing
		self.width = width
		self.height = height
		self.inter = inter

	def preprocess(self, image):
		# resize the image to a fixed size, ignoring the aspect
		# ratio
		return cv2.resize(image, (self.width, self.height),
			interpolation=self.inter)

In [6]:
import numpy as np
import cv2
import os

class SimpleDatasetLoader:
	def __init__(self, preprocessors=None):
		# store the image preprocessor
		self.preprocessors = preprocessors

		# if the preprocessors are None, initialize them as an
		# empty list
		if self.preprocessors is None:
			self.preprocessors = []

	def load(self, imagePaths, verbose=-1):
		# initialize the list of features and labels
		data = []
		labels = []

		# loop over the input images
		for (i, imagePath) in enumerate(imagePaths):
			# load the image and extract the class label assuming
			# that our path has the following format:
			# /path/to/dataset/{class}/{image}.jpg
			image = cv2.imread(imagePath)
			label = imagePath.split(os.path.sep)[-2]

			# check to see if our preprocessors are not None
			if self.preprocessors is not None:
				# loop over the preprocessors and apply each to
				# the image
				for p in self.preprocessors:
					image = p.preprocess(image)

			# treat our processed image as a "feature vector"
			# by updating the data list followed by the labels
			data.append(image)
			labels.append(label)

			# show an update every `verbose` images
			if verbose > 0 and i > 0 and (i + 1) % verbose == 0:
				print("[INFO] processed {}/{}".format(i + 1,
					len(imagePaths)))

		# return a tuple of the data and labels
		return (np.array(data), np.array(labels))

In [21]:
cat = [file for file in glob.glob("animals/cats/*")]
dog = [file for file in glob.glob("animals/dogs/*")]
panda = [file for file in glob.glob("animals/panda/*")]
imagePaths = np.concatenate((np.array(cat),np.array(dog)))
imagePaths = np.concatenate((np.array(imagePaths),np.array(panda)))
print(len(imagePaths))

catLabel = np.zeros(len(cat))
dogLabel = np.ones(len(dog))
pandaLabel = 2*np.zeros(len(panda))
labels = np.concatenate((np.array(catLabel),np.array(dogLabel)))
labels = np.concatenate((np.array(labels),np.array(pandaLabel)))
print(len(labels))

3000
3000


In [22]:
#initializing the image preprocessor
sp = SimplePreprocessor(32, 32)
sdl = SimpleDatasetLoader(preprocessors = [sp])
(data, label) = sdl.load(imagePaths, verbose = 500)
data = data.reshape((data.shape[0], 3072))

[INFO] processed 500/3000
[INFO] processed 1000/3000
[INFO] processed 1500/3000
[INFO] processed 2000/3000
[INFO] processed 2500/3000
[INFO] processed 3000/3000


In [23]:
#encoding the labels
le = LabelEncoder()
labels = le.fit_transform(labels)

#splitting the data into training and testing set
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size = 0.25, random_state = 42)

In [24]:
for r in (None, 'l1', 'l2'):
#Training a SGD Classifier using a softmax loss function and specified
#regularization function for 10 epochs
    model = SGDClassifier(loss = 'log', penalty = r, max_iter = 10, 
                         learning_rate = 'constant', tol = 1e-3, eta0 = 0.01, random_state = 12)
    
    model.fit(trainX, trainY)
    
    acc = model.score(testX, testY)
    print('[INFO] "{}" penalty accuracy: {: .2f}%'.format(r, acc*100))



[INFO] "None" penalty accuracy:  47.47%




[INFO] "l1" penalty accuracy:  55.47%
[INFO] "l2" penalty accuracy:  57.87%


