#### Step 1

TensorFlow and Keras are essential for building the CNN model.
Requests and BeautifulSoup4 are for scraping images

In [1]:
pip install tensorflow keras requests beautifulsoup4






#### Step 2: Scrape Images of Nicholas Cage and Another person

Will use Web scaper to collect images.

In [2]:
pip install selenium

Note: you may need to restart the kernel to use updated packages.




In [3]:
pip install webdriver-manager

Note: you may need to restart the kernel to use updated packages.




In [71]:
import requests
from bs4 import BeautifulSoup
import os

In [75]:
def create_directory(path):
    if not os.path.exists(path):
        os.makedirs(path)

def scrape_images(query, num_images=500, save_dir="images"):
    # Creating the directory if it doesn't exist
    create_directory(save_dir)

    image_urls = []
    start = 0
    while len(image_urls) < num_images:
        # Google Images URL with pagination
        url = f"https://www.google.com/search?q={query}&tbm=isch&start={start}"
        headers = {"User-Agent": "Mozilla/5.0"}
        response = requests.get(url, headers=headers)
        
        soup = BeautifulSoup(response.text, "html.parser")

        # Finding all image elements
        for img_tag in soup.find_all("img"):
            src = img_tag.get("src")
            if src and "http" in src:
                image_urls.append(src)
                if len(image_urls) >= num_images:
                    break

        # Breaking the loop if the desired number of images is reached
        if len(image_urls) >= num_images:
            break

        # Increment the start value for the next page
        start += 20  # Google usually returns 20 results per page

    # Downloading images
    for i, img_url in enumerate(image_urls):
        try:
            img_data = requests.get(img_url).content
            with open(os.path.join(save_dir, f"{query.replace(' ', '_')}_{i}.jpg"), "wb") as handler:
                handler.write(img_data)
        except Exception as e:
            print(f"Could not download {img_url}. Error: {e}")

# Scrape images for Nicolas Cage
scrape_images("Nicolas Cage", save_dir="images/Nicolas_Cage")

In [76]:
#Here l want to split my dataset into train and testing set
import shutil
import random

def create_directory(path):
    if not os.path.exists(path):
        os.makedirs(path)

def split_images(source_dir, train_dir, test_dir, split_ratio=0.8):
    create_directory(train_dir)
    create_directory(test_dir)

    all_images = [f for f in os.listdir(source_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]
    
    # Shuffling the images to ensure random splitting
    random.shuffle(all_images)
    
    # Calculating the number of images for training and testing
    split_index = int(len(all_images) * split_ratio)
    
    # Splitting the images into training and testing sets
    train_images = all_images[:split_index]
    test_images = all_images[split_index:]
    
    # Moving the images to their respective directories
    for img in train_images:
        shutil.copy(os.path.join(source_dir, img), os.path.join(train_dir, img))
    
    for img in test_images:
        shutil.copy(os.path.join(source_dir, img), os.path.join(test_dir, img))

# Defining paths
source_directory = "images/Nicolas_Cage"
train_directory = "images/Nicolas_Cage/train"
test_directory = "images/Nicolas_Cage/test"

# Splitting the images
split_images(source_directory, train_directory, test_directory)


#### Step 3: Build CNN Model

In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator




In [5]:
# Fitting the CNN to the images
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

test_datagen = ImageDataGenerator(
        rescale=1./255)

training_set = train_datagen.flow_from_directory(
        r'C:\Users\0022491\Desktop\IIE VARSITY COLLEGE\2024\Semester 2\ICE TASK\images\Nicolas_Cage\train',
        target_size=(128,128),
        batch_size=32,
        class_mode='categorical')

test_set = test_datagen.flow_from_directory(
        r'C:\Users\0022491\Desktop\IIE VARSITY COLLEGE\2024\Semester 2\ICE TASK\images\Nicolas_Cage\test',
        target_size=(128,128),
        batch_size=32,
        class_mode='categorical')

# Print counts to verify
print(f'Training Set: {len(training_set.filenames)} images')
print(f'Test Set: {len(test_set.filenames)} images')


Found 416 images belonging to 2 classes.
Found 104 images belonging to 2 classes.
Training Set: 416 images
Test Set: 104 images


In [6]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(units=128, activation='relu'))
model.add(Dense(units=2, activation='softmax'))  # 2 classes

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])






#### Step 4: Train model

In [7]:
history = model.fit(
    training_set,
    epochs=10,
    validation_data=test_set,
    verbose=1
)

Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


#### Evaluate

In [8]:
test_loss, test_accuracy = model.evaluate(test_set, verbose=1)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

Test Loss: 0.24091988801956177
Test Accuracy: 0.9615384340286255


#####  Make Predictions

In [12]:
import numpy as np
from keras.preprocessing import image

def predict_image(img_path):
    img = image.load_img(img_path, target_size=(128, 128))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0) 
    img_array /= 255.0  
    predictions = model.predict(img_array)
    class_labels = ['Nicolas Cage', 'Tom Cruise']
    predicted_class = class_labels[np.argmax(predictions)]
    return predicted_class

predicted = predict_image(r'C:\Users\0022491\Desktop\IIE VARSITY COLLEGE\2024\Semester 2\ICE TASK\images\predict\Nicolas_Cage_20.jpg')
print(f'Predicted Class: {predicted}')

Predicted Class: Nicolas Cage


In [15]:
#Another person
another_person = predict_image(r'C:\Users\0022491\Desktop\IIE VARSITY COLLEGE\2024\Semester 2\ICE TASK\images\Tom_Cruise\Tom Cruise_17.jpg')
print(f'Predicted Class: {another_person}')

Predicted Class: Nicolas Cage


In [20]:
import os
import random
def predict_image(img_path):
    img = image.load_img(img_path, target_size=(128, 128))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0) 
    img_array /= 255.0  
    predictions = model.predict(img_array)
    class_labels = ['Nicolas Cage']
    predicted_class = class_labels[np.argmax(predictions)]
    return predicted_class

def predict_random_image(folder_path):
    # List all files in the folder
    images = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
    
    # Select a random image
    random_image = random.choice(images)
    img_path = os.path.join(folder_path, random_image)
    
    # Predict the class of the random image
    predicted_class = predict_image(img_path)
    return img_path, predicted_class

# Example usage
folder_path = r'C:\Users\0022491\Desktop\IIE VARSITY COLLEGE\2024\Semester 2\ICE TASK\images\predict'
img_path, predicted = predict_random_image(folder_path)
print(f'Random Image Path: {img_path}')
print(f'Predicted Class: {predicted}')


Random Image Path: C:\Users\0022491\Desktop\IIE VARSITY COLLEGE\2024\Semester 2\ICE TASK\images\predict\Nicolas_Cage_127.jpg
Predicted Class: Nicolas Cage
