In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from imutils import paths
import pandas as pd
import numpy as np 
import imutils
import cv2
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
import io
import zipfile
from pathlib import Path
Path.cwd()
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
train_dir = '/content/gdrive/MyDrive/NTI/Week 4/week4 task/train'
test_dir = '/content/gdrive/MyDrive/NTI/Week 4/week4 task/test1'

In [None]:
def image_to_feature_vector(image, size=(32, 32)):
	# resize the image to a fixed size, then flatten the image into a list of raw pixel intensities
	return cv2.resize(image, size).flatten()

In [None]:
def extract_color_histogram(image, bins=(8, 8, 8)):
	# extract a 3D color histogram from the HSV color space using
	# the supplied number of `bins` per channel
	hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
	hist = cv2.calcHist([hsv], [0, 1, 2], None, bins,
		[0, 180, 0, 256, 0, 256])
	# handle normalizing the histogram if we are using OpenCV 2.4.X
	if imutils.is_cv2():
		hist = cv2.normalize(hist)
	# otherwise, perform "in place" normalization in OpenCV 3
	else:
		cv2.normalize(hist, hist)
	# return the flattened histogram as the feature vector
	return hist.flatten()

In [None]:
# grab the list of images that we'll be describing
imagePaths = list(paths.list_images(train_dir))
len(imagePaths)

25000

In [None]:
rawImages = []
features = []
labels = []

neighbors = 2


for (i, imagePath) in enumerate(imagePaths):
	# load the image and extract the class label (assuming that our
	# path as the format: /path/to/dataset/{class}.{image_num}.jpg
	image = cv2.imread(imagePath)
	label = imagePath.split(os.path.sep)[-1].split(".")[0]
    
	# extract raw pixel intensity "features", followed by a color
	# histogram to characterize the color distribution of the pixels
	# in the image
	pixels = image_to_feature_vector(image)
	hist = extract_color_histogram(image)
    
	# update the raw images, features, and labels matricies, respectively
	rawImages.append(pixels)
	features.append(hist)
	labels.append(label)
    
	# show an update every 1,000 images
	if i > 0 and i % 1000 == 0:
		print("Processed {}/{}".format(i, len(imagePaths)))

Processed 1000/25000
Processed 2000/25000
Processed 3000/25000
Processed 4000/25000
Processed 5000/25000
Processed 6000/25000
Processed 7000/25000
Processed 8000/25000
Processed 9000/25000
Processed 10000/25000
Processed 11000/25000
Processed 12000/25000
Processed 13000/25000
Processed 14000/25000
Processed 15000/25000
Processed 16000/25000
Processed 17000/25000
Processed 18000/25000
Processed 19000/25000
Processed 20000/25000
Processed 21000/25000
Processed 22000/25000
Processed 23000/25000
Processed 24000/25000


In [None]:
# show some information on the memory consumed by the raw images matrix and features matrix
rawImages = np.array(rawImages)
features = np.array(features)
labels = np.array(labels)
print("[INFO] pixels matrix: {:.2f}MB".format(
	rawImages.nbytes / (1024 * 1000.0)))
print("[INFO] features matrix: {:.2f}MB".format(
	features.nbytes / (1024 * 1000.0)))

[INFO] pixels matrix: 75.00MB
[INFO] features matrix: 50.00MB


In [None]:
trainRI, testRI, trainRL, testRL = train_test_split(rawImages, labels, test_size=0.2)
trainFeat, testFeat, trainLabels, testLabels = train_test_split(features, labels, test_size=0.2)

In [None]:
# train and evaluate a k-NN classifer on the raw pixel intensities
model = KNeighborsClassifier(n_neighbors=neighbors, n_jobs=-1)
model.fit(trainRI, trainRL)
raw_acc = model.score(testRI, testRL)
print("Raw pixel accuracy: {:.2f}%".format(raw_acc * 100))

Raw pixel accuracy: 53.26%


In [None]:
# train and evaluate a k-NN classifer on the feature pixel intensities
model = KNeighborsClassifier(n_neighbors=neighbors, n_jobs=-1)
model.fit(trainFeat, trainLabels)
feat_acc = model.score(testFeat, testLabels)
print("Feature pixel accuracy: {:.2f}%".format(feat_acc * 100))

Feature pixel accuracy: 58.98%


In [None]:
# initialize the raw pixel intensities matrix, the features matrix, and labels list
testPaths = list(paths.list_images(test_dir))
testImages = []
testFeatures = []
# testLabels = []

In [None]:
for (i, imagePath) in enumerate(testPaths):
	# load the image and extract the class label (assuming that our
	# path as the format: /path/to/dataset/{class}.{image_num}.jpg
	image = cv2.imread(imagePath)
# 	label = imagePath.split(os.path.sep)[-1].split(".")[0]
    
	# extract raw pixel intensity "features", followed by a color
	# histogram to characterize the color distribution of the pixels
	# in the image
	pixels = image_to_feature_vector(image)
	hist = extract_color_histogram(image)
    
	# update the raw images, features, and labels matricies, respectively
	testImages.append(pixels)
	testFeatures.append(hist)
# 	testLabels.append(label)
    
	# show an update every 1,000 images
	if i > 0 and i % 1000 == 0:
		print("[INFO] processed {}/{}".format(i, len(testPaths)))

[INFO] processed 1000/12500
[INFO] processed 2000/12500
[INFO] processed 3000/12500
[INFO] processed 4000/12500
[INFO] processed 5000/12500
[INFO] processed 6000/12500
[INFO] processed 7000/12500
[INFO] processed 8000/12500
[INFO] processed 9000/12500
[INFO] processed 10000/12500
[INFO] processed 11000/12500
[INFO] processed 12000/12500


In [None]:
pred = model.predict(testFeatures)
pred = np.array([0 if x == "dog" else 1 for x in pred ])

In [None]:
pred

array([1, 1, 1, ..., 1, 1, 0])