#creat our own dataset
https://github.com/RiddlerQ/simple_image_download/tree/master

In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [None]:
!pip install simple_image_download

Collecting simple_image_download
  Downloading simple_image_download-0.5-py3-none-any.whl.metadata (6.2 kB)
INFO: pip is looking at multiple versions of simple-image-download to determine which version is compatible with other requirements. This could take a while.
  Downloading simple_image_download-0.4-py3-none-any.whl.metadata (2.4 kB)
  Downloading simple_image_download-0.2-py3-none-any.whl.metadata (494 bytes)
Downloading simple_image_download-0.2-py3-none-any.whl (3.8 kB)
Installing collected packages: simple_image_download
Successfully installed simple_image_download-0.2


In [None]:
from simple_image_download import simple_image_download
response = simple_image_download.simple_image_download()

In [None]:
queries = [
    'Morgan Freeman',
    'Robin Williams',
    'Keanu Reeves',
    'Jackie Chan',
    'Hugh Jackman'
]

In [None]:
def download_images(queries, limit):
  for query in queries:
    response.download(query, limit)

In [None]:
download_images(queries, 54)

# base_model: The typical transfer-learning workflow
This leads us to how a typical transfer learning workflow can be implemented in Keras:

* Instantiate a base model and load pre-trained weights into it.
* Freeze all layers in the base model by setting trainable = False.
* Create a new model on top of the output of one (or several) layers from the base model.
* Train your new model on your new dataset.

In [None]:
from keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input, decode_predictions

In [None]:
model = VGG16(weights='imagenet',
              include_top=False,
              input_shape=(224, 224, 3))

In [None]:
model.summary()

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np

datagenerator = ImageDataGenerator(rescale=1./255)

def feature_extractor(sample_size, batch_size, dir):
    features = np.zeros((sample_size, 7, 7, 512))
    labels = np.zeros(sample_size)

    datagen = datagenerator.flow_from_directory(dir,
                                                target_size=(224, 224),
                                                batch_size=batch_size,
                                                class_mode='categorical')

    cnt = 0
    total_samples = 0  # Track actual collected samples

    for input_pxl, lbl in datagen:
        features_batch = model.predict(input_pxl)
        batch_size_actual = features_batch.shape[0]  # Get actual batch size

        start_idx = total_samples
        end_idx = start_idx + batch_size_actual

        if end_idx > sample_size:  # Ensure we don't go out of bounds
            end_idx = sample_size

        if start_idx < sample_size:  # Only assign if start_idx is valid
            features[start_idx:end_idx] = features_batch[:end_idx - start_idx]
            labels[start_idx:end_idx] = np.argmax(lbl[:end_idx - start_idx], axis=1)

        total_samples += batch_size_actual

        if total_samples >= sample_size:  # Stop when enough samples are collected
            break

    return features[:total_samples], labels[:total_samples], datagen.class_indices

In [None]:
import os
from PIL import Image, UnidentifiedImageError

dir_path = "/content/simple_images"

for folder in os.listdir(dir_path):
    folder_path = os.path.join(dir_path, folder)
    if os.path.isdir(folder_path):  # Ensure it's a folder
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            try:
                with Image.open(file_path) as img:
                    img.verify()  # Check if it's a valid image
            except (UnidentifiedImageError, IOError):
                print(f"Corrupt or non-image file detected: {file_path}")
                os.remove(file_path)  # Delete the corrupt file
                print(f"Deleted: {file_path}")


Corrupt or non-image file detected: /content/simple_images/Robin Williams/Robin Williams_35.jpg
Deleted: /content/simple_images/Robin Williams/Robin Williams_35.jpg
Corrupt or non-image file detected: /content/simple_images/Robin Williams/Robin Williams_45.jpg
Deleted: /content/simple_images/Robin Williams/Robin Williams_45.jpg
Corrupt or non-image file detected: /content/simple_images/Robin Williams/Robin Williams_47.jpg
Deleted: /content/simple_images/Robin Williams/Robin Williams_47.jpg
Corrupt or non-image file detected: /content/simple_images/Robin Williams/Robin Williams_53.jpg
Deleted: /content/simple_images/Robin Williams/Robin Williams_53.jpg
Corrupt or non-image file detected: /content/simple_images/Robin Williams/Robin Williams_49.jpg
Deleted: /content/simple_images/Robin Williams/Robin Williams_49.jpg
Corrupt or non-image file detected: /content/simple_images/Jackie Chan/Jackie Chan_43.jpg
Deleted: /content/simple_images/Jackie Chan/Jackie Chan_43.jpg
Corrupt or non-image f

In [None]:
train_features, train_labels, _ = feature_extractor(238, 50, "/content/simple_images")
train_features = train_features.reshape(-1, 7*7*512)

Found 237 images belonging to 5 classes.
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 493ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 287ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step


# train our classifier

In [None]:
from sklearn.model_selection import train_test_split

train_features, val_features, train_labels, val_labels = train_test_split(
    train_features, train_labels, test_size=0.2, random_state=42
)

In [None]:
from sklearn.svm import SVC
classifier = SVC(C=4, kernel='rbf')
classifier.fit(train_features, train_labels)

In [None]:
classifier.score(train_features, train_labels)

0.9842105263157894

In [None]:
y_pred = classifier.predict(val_features)
print(y_pred)

[0. 0. 0. 3. 0. 2. 0. 4. 0. 3. 3. 4. 2. 4. 2. 4. 4. 2. 2. 3. 0. 0. 1. 0.
 3. 2. 4. 1. 4. 4. 0. 3. 4. 3. 4. 3. 4. 4. 2. 3. 2. 4. 0. 3. 4. 3. 0. 3.]


In [None]:
print(val_labels)

[0. 0. 1. 3. 0. 1. 0. 0. 0. 3. 3. 2. 2. 2. 2. 0. 4. 4. 3. 3. 0. 2. 4. 0.
 3. 2. 2. 1. 4. 2. 3. 0. 4. 3. 1. 2. 1. 4. 2. 3. 2. 4. 1. 3. 1. 3. 3. 3.]


In [None]:
from sklearn.metrics import accuracy_score

# y_pred: predicted labels
# test_labels: true labels
accuracy = accuracy_score(val_labels, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 58.33%
