In [1]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import cv2

In [2]:
#importing the data
data_dir = '/home/kalyan/DataSets/DogsandCats/'
train_dir = os.path.join(data_dir, 'training_set/training_set/')
test_dir = os.path.join(data_dir, 'test_set/test_set')
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')
test_cats_dir = os.path.join(test_dir, 'cats')
test_dogs_dir = os.path.join(test_dir, 'dogs')

#checking the number of images in each folder
print('Total training cat images:', len(os.listdir(train_cats_dir)))
print('Total training dog images:', len(os.listdir(train_dogs_dir)))
print('Total test cat images:', len(os.listdir(test_cats_dir)))
print('Total test dog images:', len(os.listdir(test_dogs_dir)))


Total training cat images: 4000
Total training dog images: 4000
Total test cat images: 1000
Total test dog images: 1000


In [3]:
#taking svd of the images and to truncatate first 10 features of the images as it is most contributing to the images

def svd_truncate(img, n):
    #converting images to grayscale
    img = img.mean(axis=2)
    U, s, V = np.linalg.svd(img)
    U = U[:, :n]
    s = s[:n]
    us = np.dot(U, np.diag(s))
    return us

In [4]:
#getting svd of all images
def get_svd_images(img_dir, n):
    img_files = os.listdir(img_dir)
    img_files = [os.path.join(img_dir, f) for f in img_files]
    #img_files = [plt.imread(f) for f in img_files]
    # read images from file, resize them into 100x100, store in single array
    img_files = [cv2.resize(plt.imread(f), (100, 100)) for f in img_files]
    svd_images = [svd_truncate(img, n) for img in img_files]
    return svd_images

In [5]:
cat_images = get_svd_images(train_cats_dir, 5)
cat_images = np.array(cat_images)

In [6]:
cat_images[0].shape

(100, 5)

In [7]:
dog_images = get_svd_images(train_dogs_dir, 5)
dog_images = np.array(dog_images)

In [8]:
#concatenating the images
images = np.concatenate((cat_images, dog_images), axis=0)
print(images.shape)

(8000, 100, 5)


In [9]:
#creating labels for the images
labels = np.concatenate((np.zeros(cat_images.shape[0]), np.ones(dog_images.shape[0])), axis=0)
print(labels.shape)

(8000,)


In [10]:
#to solve this only size-1 arrays can be converted to Python scalars
labels = labels.astype(int)
print(labels.shape)

(8000,)


In [14]:
test_cat_images = get_svd_images(test_cats_dir, 5)
test_cat_images = np.array(test_cat_images)
#test_cat_images = test_cat_images.reshape(test_cat_images.shape[0])
print(test_cat_images.shape)

(1000, 100, 5)


In [15]:
#test for dogs
test_dog_images = get_svd_images(test_dogs_dir, 5)
test_dog_images = np.array(test_dog_images)
#test_dog_images = test_dog_images.reshape(test_dog_images.shape[0], -1)
print(test_dog_images.shape)


(1000, 100, 5)


In [16]:
#concatenating the test images
test_images = np.concatenate((test_cat_images, test_dog_images), axis=0)
print(test_images.shape)

(2000, 100, 5)


In [17]:
labels

array([0, 0, 0, ..., 1, 1, 1])

In [18]:
images.shape

(8000, 100, 5)

array([[-5.29549600e+02],
       [ 3.86837197e+00],
       [ 1.90808671e+01],
       [ 5.83913154e+01],
       [ 4.21477638e+01],
       [-5.33229614e+02],
       [-2.68250376e+00],
       [ 1.21026953e+01],
       [ 5.69986941e+01],
       [ 2.65860748e+01],
       [-5.33121446e+02],
       [-1.69054529e+01],
       [ 9.91779418e+00],
       [ 5.06062240e+01],
       [ 3.77445302e+01],
       [-5.37611881e+02],
       [-1.42606084e+01],
       [ 9.74183523e+00],
       [ 4.16418599e+01],
       [ 2.66550124e+01],
       [-5.29219357e+02],
       [-2.08882444e+01],
       [ 6.01507976e+00],
       [ 3.96861996e+01],
       [ 2.17370240e+01],
       [-5.20790642e+02],
       [-2.40269818e+01],
       [ 8.69824652e+00],
       [ 3.40812479e+01],
       [ 1.45612484e+01],
       [-5.21436139e+02],
       [-3.60398903e+01],
       [ 4.85693720e+00],
       [ 1.94946625e+01],
       [ 4.03872853e+00],
       [-5.83770900e+02],
       [-1.07894135e+02],
       [ 2.29361822e+01],
       [-4.1

In [20]:
for i in range(len(images)):
    if i % 100 == 0:
        print(images[i].shape)

(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)
(100, 5)


In [24]:
images.shape

(8000, 100, 5)

In [36]:
image_1d = np.array([img.flatten() for img in images])
test_images_1d = np.array([img.flatten() for img in test_images])

In [28]:
image_1d.shape

(8000, 500)

In [32]:
labels.shape

(8000,)

In [37]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(image_1d, labels)
ypred = knn.predict(test_images_1d)

In [35]:
knn.score(image_1d, labels)

0.711

In [12]:
#creating a dataframe
df = pd.DataFrame(images)
df['label'] = labels

(1000, 1)


  This is separate from the ipykernel package so we can avoid doing imports until


In [13]:
test_cat_images = test_images

(1000, 1)


  This is separate from the ipykernel package so we can avoid doing imports until


(2000, 1)


In [16]:
#model building
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression


X_train = df.drop('label', axis=1)
y_train = df['label']
X_test = test_images
y_test = np.concatenate((np.zeros(test_cat_images.shape[0]), np.ones(test_dog_images.shape[0])), axis=0)
y_test = y_test.astype(int)


pandas.core.frame.DataFrame

In [25]:
#knn model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
print('Accuracy of KNN model is:', accuracy_score(y_test, y_pred))

TypeError: unhashable type: 'numpy.ndarray'