In [10]:
import os
from tqdm import tqdm
from keras.utils import load_img, img_to_array, to_categorical
from keras.applications import VGG16
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten
from keras.models import Sequential
import numpy as np
import joblib

In [2]:
images_base_path = r'/Users/praneethkumarpalepu/Downloads/animals/images'

In [3]:
def image_dataset_loader(base_path, image_size=(224, 224), test_size=0.2, random_state=0, batch_size= 64):
    images, labels = [], []
    image_file_location, act_label = [], []
    for folder in os.listdir(base_path):
        if not folder.startswith('.'):
            folder_path = os.path.join(base_path, folder)
            for file in os.listdir(folder_path):
                _, ext = os.path.splitext(file)
                if ext.lower() in ['.jpg', '.jpeg', '.png']:
                    file_path = os.path.join(folder_path, file)
                    image_file_location.append(file_path)
                    act_label.append(folder)

    total_files = len(image_file_location)
    quotient = total_files//batch_size
    reminder = total_files % batch_size
    total_batches = quotient+1 if reminder > 0 else quotient

    for batch in tqdm(range(total_batches)):
        start_index = batch * batch_size
        end_index = (batch * batch_size)+batch_size
        if end_index > total_files:
            end_index = end_index - total_files
        image_batch = image_file_location[start_index:end_index]
        label = act_label[start_index:end_index]
        counter= 0
        tmp_image, tmp_labels = [], []
        for file in image_batch:           
            image = load_img(file, target_size=image_size)
            image_arr = img_to_array(image)
            tmp_image.append(image_arr)
            tmp_labels.append(label[counter])
            counter+=1

        images.extend(np.array(tmp_image))
        labels.extend(np.array(tmp_labels))

    images = np.array(images)
    labels = np.array(labels)

    xtr, xte, ytr, yte = train_test_split(images, labels, test_size= test_size, random_state= random_state)

    return (xtr, ytr), (xte, yte)

In [4]:
(xtr, ytr), (xte, yte) = image_dataset_loader(images_base_path, batch_size=128)

100%|███████████████████████████████████████████| 24/24 [00:04<00:00,  5.68it/s]


In [6]:
vgg_model = VGG16(include_top=False, input_shape=(224, 224, 3))

In [7]:
model = Sequential([
    vgg_model,
    Flatten()
])

In [8]:
xtr_features = model.predict(xtr)

[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m232s[0m 3s/step


In [9]:
xte_features = model.predict(xte)

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 3s/step


In [11]:
lr = LogisticRegression(solver='liblinear')

In [12]:
lr.fit(xtr_features, ytr)

In [13]:
lr_pred = lr.predict(xte_features)

In [14]:
accuracy_score(yte, lr_pred)

0.969439728353141

In [15]:
dcp_features = joblib.dump(xtr_features, r'animals_dcp_features.joblib')

In [None]:
features = joblib.load(r'animals_dcp_features.joblib')