In [1]:
import os 
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import pyplot as plta
import numpy as np
from tqdm import tqdm
import cv2

In [2]:
#importing the data
data_dir = 'DogsandCats/'
train_dir = os.path.join(data_dir, 'training_set/training_set/')
test_dir = os.path.join(data_dir, 'test_set/test_set')
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')
test_cats_dir = os.path.join(test_dir, 'cats')
test_dogs_dir = os.path.join(test_dir, 'dogs')


#checking the number of images in each folder
print('Total training cat images:', len(os.listdir(train_cats_dir)))
print('Total training dog images:', len(os.listdir(train_dogs_dir)))
print('Total test cat images:', len(os.listdir(test_cats_dir)))
print('Total test dog images:', len(os.listdir(test_dogs_dir)))


Total training cat images: 4000
Total training dog images: 4000
Total test cat images: 1000
Total test dog images: 1000


In [3]:
#loading the images
train_images = []
train_labels = []
test_images = []
test_labels = []

for i in tqdm(range(len(os.listdir(train_cats_dir)))):
    img = cv2.imread(train_cats_dir + '/' + os.listdir(train_cats_dir)[i])
    img = cv2.resize(img, (64,64))
    train_images.append(img)
    train_labels.append(0)


100%|██████████| 4000/4000 [00:21<00:00, 187.45it/s]


In [4]:
for i in tqdm(range(len(os.listdir(train_dogs_dir)))):
    img = cv2.imread(train_dogs_dir + '/' + os.listdir(train_dogs_dir)[i])
    img = cv2.resize(img, (64,64))
    train_images.append(img)
    train_labels.append(1)

100%|██████████| 4000/4000 [00:21<00:00, 189.63it/s]


In [5]:
train_images = np.array(train_images)
train_labels = np.array(train_labels)

In [6]:
def DFT(input):
    N = input.shape[0]
    output = np.zeros(input.shape, dtype=complex)
    B = np.zeros(input.shape, dtype=complex)
    for k in range(N):
        for n in range(N):
            B[n] = np.exp(-1j*2*np.pi*k*n/N)
        output[k] = np.sum(input*B)
    return output

In [7]:
dft_images = []
for i in tqdm(range(train_images.shape[0])):
    dft_images.append(DFT(train_images[i]))

100%|██████████| 8000/8000 [01:44<00:00, 76.28it/s] 


In [8]:
dft_images = np.array(dft_images)
dft_images.shape

(8000, 64, 64, 3)

In [9]:
dft_images = dft_images.reshape(dft_images.shape[0], -1)


In [10]:
dft_images.shape

(8000, 12288)

In [11]:
X = np.save('dft_images.npy', dft_images)
Y = np.save('train_labels.npy', train_labels)

In [12]:
dft_images = dft_images.real


In [17]:
#first and last 500 of dft images
dft_images1 = np.concatenate((dft_images[:500], dft_images[-500:]), axis=0)
train_labels1 = np.concatenate((train_labels[:500], train_labels[-500:]), axis=0)


In [19]:
dft_images1.shape, train_labels1.shape

((1000, 12288), (1000,))

In [20]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(dft_images1, train_labels1, test_size=0.2, random_state=42)

In [22]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
clf = LogisticRegression(random_state=0).fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.515

In [23]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(max_depth=2, random_state=0)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.515