In [6]:
# import the necessary packages
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from ndl.preprocessing import SimplePreprocessor
from ndl.datasets import SimpleDatasetLoader
from imutils import paths

In [7]:
dataset = '../datasets/animals'

imagePaths = list(paths.list_images(dataset))

# initialize the image preprocessor, load the dataset from disk,
# and reshape the data matrix
sp = SimplePreprocessor(32, 32)
sdl = SimpleDatasetLoader(preprocessors=[sp])
(data, labels) = sdl.load(imagePaths, verbose=500)
data = data.reshape((data.shape[0], 3072))

[INFO] processed 500/3000
[INFO] processed 1000/3000
[INFO] processed 1500/3000
[INFO] processed 2000/3000
[INFO] processed 2500/3000
[INFO] processed 3000/3000


In [13]:
# encode the labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)

# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data, labels,
    test_size=0.25, random_state=42)

### Now, let's try a few regularization techniques.  

Regularization takes the form of a sum of the values in the W matrix.
Then it is applied to the loss function with a 'lambda' parameter that determines the strength of regularization.

- L1 - uses absolute value
- L2 - aka weight decay - uses square values 

In [19]:
# loop over our set of regularizers
for r in (None, "l1", "l2"):
    # train a SGD classifier using a softmax loss function and the
    # specified regularization function for 10 epochs
    print("[INFO] training model with `{}` penalty".format(r))
    model = SGDClassifier(loss="log", penalty=r, max_iter=10,
        learning_rate="constant", eta0=0.01, random_state=1)
    model.fit(trainX, trainY)

    # evaluate the classifier
    acc = model.score(testX, testY)
    print("[INFO] `{}` penalty accuracy: {:.2f}%".format(r,
        acc * 100))

[INFO] training model with `None` penalty
[INFO] `None` penalty accuracy: 38.13%
[INFO] training model with `l1` penalty
[INFO] `l1` penalty accuracy: 41.20%
[INFO] training model with `l2` penalty
[INFO] `l2` penalty accuracy: 44.80%


This example is too small to show the real effect of regularization, but regularization can certainly provide a boost to testing accuracy and reduce overfitting, provided we can tune the hyperparameters correctly.

**In gradient descent algorithms the important parameters to tune are the learning rate 'alpha' and the regularization 'lambda' parameter (as well as determining the best regularization method**