<a href="https://colab.research.google.com/github/Sindhuhar/deep_learning/blob/main/understanding_regularization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Understanding regularization for image classification and machine learning


### Import Packages

In [2]:
# import the necessary packages
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from pyimagesearch.preprocessing import SimplePreprocessor
from pyimagesearch.datasets import SimpleDatasetLoader
from imutils import paths

### Image classification using regularization with Python and scikit-learn

In [3]:
# construct the argument parser and parse the arguments
# ap = argparse.ArgumentParser()
# ap.add_argument("-d", "--dataset", required=True,
# 	help="path to input dataset")
# args = vars(ap.parse_args())

# since we are using Jupyter Notebooks we can replace our argument
# parsing code with *hard coded* arguments and values
args = {
	"dataset": "dataset/animals"
}

In [4]:
# grab the list of image paths
print("[INFO] loading images...")
imagePaths = list(paths.list_images(args["dataset"]))

# initialize the image preprocessor, load the dataset from disk,
# and reshape the data matrix
sp = SimplePreprocessor(32, 32)
sdl = SimpleDatasetLoader(preprocessors=[sp])
(data, labels) = sdl.load(imagePaths, verbose=500)
data = data.reshape((data.shape[0], 3072))

[INFO] loading images...
[INFO] processed 500/3000
[INFO] processed 1000/3000
[INFO] processed 1500/3000
[INFO] processed 2000/3000
[INFO] processed 2500/3000
[INFO] processed 3000/3000


In [5]:
# encode the labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)

# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data, labels,
	test_size=0.25, random_state=42)

In [6]:
# loop over our set of regularizers
for r in (None, "l1", "l2"):
	# train a SGD classifier using a softmax loss function and the
	# specified regularization function for 10 epochs
	print("[INFO] training model with '{}' penalty".format(r))
	model = SGDClassifier(loss="log", penalty=r, max_iter=10,
		learning_rate="constant", tol=1e-3, eta0=0.01, random_state=12)
	model.fit(trainX, trainY)

	# evaluate the classifier
	acc = model.score(testX, testY)
	print("[INFO] {} penalty accuracy: {:.2f}%".format(r,
		acc * 100))

[INFO] training model with 'None' penalty




[INFO] None penalty accuracy: 56.13%
[INFO] training model with 'l1' penalty




[INFO] l1 penalty accuracy: 49.20%
[INFO] training model with 'l2' penalty
[INFO] l2 penalty accuracy: 55.73%


