In [10]:
pip install selenium opencv-python

Collecting seleniumNote: you may need to restart the kernel to use updated packages.

  Using cached selenium-4.27.1-py3-none-any.whl.metadata (7.1 kB)
Collecting opencv-python
  Using cached opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting trio~=0.17 (from selenium)
  Using cached trio-0.28.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Using cached trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting websocket-client~=1.8 (from selenium)
  Using cached websocket_client-1.8.0-py3-none-any.whl.metadata (8.0 kB)
Collecting attrs>=23.2.0 (from trio~=0.17->selenium)
  Using cached attrs-24.3.0-py3-none-any.whl.metadata (11 kB)
Collecting sortedcontainers (from trio~=0.17->selenium)
  Using cached sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.17->selenium)
  Using cached outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting sniffio>=1.3.0 (from tr

In [23]:
import os
import shutil
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np

## **2. Dataset Setup**
# Define the pathways and categories for bear categorization.
base_dir = "datasets"
categories = ["black_bear", "grizzly_bear", "panda_bear", "polar_bear"]
train_dir = os.path.join(base_dir, "train")
val_dir = os.path.join(base_dir, "val")
test_dir = os.path.join(base_dir, "test")

In [24]:
base_dir = "C:/Users/adamd/Desktop/BEAR/BEAR/datasets"

raw_dir = os.path.join(base_dir, "raw")
for category in categories:
    category_dir = os.path.join(raw_dir, category)
    print(f"Looking for directory: {category_dir}")  # Debugging output

Looking for directory: C:/Users/adamd/Desktop/BEAR/BEAR/datasets\raw\black_bear
Looking for directory: C:/Users/adamd/Desktop/BEAR/BEAR/datasets\raw\grizzly_bear
Looking for directory: C:/Users/adamd/Desktop/BEAR/BEAR/datasets\raw\panda_bear
Looking for directory: C:/Users/adamd/Desktop/BEAR/BEAR/datasets\raw\polar_bear


In [25]:
import os
print(os.getcwd())  # Displays the current working directory.

c:\Users\adamd\Desktop\BEAR\BEAR


In [26]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import os
import time
import requests
from PIL import Image
from io import BytesIO

# Initialize Selenium WebDriver
driver = webdriver.Chrome()

# Define search criteria and save the path for bear photos.
keywords = ["Black Bear", "Grizzly Bear", "Panda Bear", "Polar Bear"]
output_directory = "datasets/raw"
os.makedirs(output_directory, exist_ok=True)

# Number of photos to download for each keyword.
num_images = 2500

# Function for downloading and validating pictures.
def download_images(keyword, folder, num_images):
    search_url = f"https://www.google.com/search?q={keyword.replace(' ', '+')}&tbm=isch"
    driver.get(search_url)
    time.sleep(2)  # Allow the webpage to load.

    # Scroll to load further photos.
    for _ in range(5):  # Adjust the number of scrolls as required.
        driver.execute_script("window.scrollBy(0, 1000);")
        time.sleep(2)

    # Locate image components.
    images = driver.find_elements(By.CSS_SELECTOR, "img")
    print(f"Found {len(images)} images for {keyword}. Starting download...")

    count = 0
    for img in images:
        if count >= num_images:
            break
        try:
            src = img.get_attribute("src")
            if src and "http" in src:  # Ensure it is a legitimate URL.
                response = requests.get(src, stream=True)
                if response.status_code == 200:
                    # Save the picture in the folder.
                    filepath = os.path.join(folder, f"{keyword.replace(' ', '_')}_{count + 1}.jpg")
                    with open(filepath, "wb") as file:
                        for chunk in response.iter_content(1024):
                            file.write(chunk)
                    
                    # Validate the downloaded image
                    try:
                        with Image.open(filepath) as im:
                            im.verify()  # Verify if the image is valid
                        print(f"Downloaded and validated: {filepath}")
                        count += 1
                    except (IOError, SyntaxError) as e:
                        print(f"Corrupted image detected, deleting: {filepath}")
                        os.remove(filepath)  # Remove corrupted image
        except Exception as e:
            print(f"Error downloading image {count + 1}: {e}")

# Loop over each term and download photos.
for keyword in keywords:
    folder_path = os.path.join(output_directory, keyword.replace(' ', '_'))
    os.makedirs(folder_path, exist_ok=True)
    download_images(keyword, folder_path, num_images)

# Close the WebDriver
driver.quit()

Found 252 images for Black Bear. Starting download...
Downloaded and validated: datasets/raw\Black_Bear\Black_Bear_1.jpg
Downloaded and validated: datasets/raw\Black_Bear\Black_Bear_2.jpg
Corrupted image detected, deleting: datasets/raw\Black_Bear\Black_Bear_3.jpg
Downloaded and validated: datasets/raw\Black_Bear\Black_Bear_3.jpg
Downloaded and validated: datasets/raw\Black_Bear\Black_Bear_4.jpg
Downloaded and validated: datasets/raw\Black_Bear\Black_Bear_5.jpg
Downloaded and validated: datasets/raw\Black_Bear\Black_Bear_6.jpg
Downloaded and validated: datasets/raw\Black_Bear\Black_Bear_7.jpg
Downloaded and validated: datasets/raw\Black_Bear\Black_Bear_8.jpg
Downloaded and validated: datasets/raw\Black_Bear\Black_Bear_9.jpg
Downloaded and validated: datasets/raw\Black_Bear\Black_Bear_10.jpg
Downloaded and validated: datasets/raw\Black_Bear\Black_Bear_11.jpg
Downloaded and validated: datasets/raw\Black_Bear\Black_Bear_12.jpg
Downloaded and validated: datasets/raw\Black_Bear\Black_Bear_1

In [28]:

import os
import cv2
import numpy as np

# Define paths
input_path = r"C:/Users/adamd/Desktop/BEAR/BEAR/datasets/raw"
output_path = r"C:/Users/adamd/Desktop/BEAR/BEAR/datasets/processed"
os.makedirs(output_path, exist_ok=True)

# Define image size
IMG_SIZE = 224

# Define categories
categories = ['black_bear', 'grizzly_bear', 'panda_bear', 'polar_bear']

# List the folders in the raw directory to confirm the right path.
print("Folders in raw directory:", os.listdir(input_path))

# Loop through every category.
for category in categories:
    input_folder = os.path.join(input_path, category)
    output_folder = os.path.join(output_path, category)

    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Ensure the input folder exists before processing
    if not os.path.exists(input_folder):
        print(f"Error: {input_folder} does not exist.")
        continue

    # Process each image in the folder
    for img_name in os.listdir(input_folder):
        try:
            # Read image
            img_path = os.path.join(input_folder, img_name)
            img = cv2.imread(img_path)

            # Resize image
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))

            # Save processed image
            output_file = os.path.join(output_folder, img_name)
            cv2.imwrite(output_file, img)
            print(f"Processed and saved: {output_file}")
        except Exception as e:
            print(f"Error processing {img_name}: {e}")

Folders in raw directory: ['black_bear', 'grizzly_bear', 'panda_bear', 'polar_bear']
Processed and saved: C:/Users/adamd/Desktop/BEAR/BEAR/datasets/processed\black_bear\Black_Bear_1.jpg
Processed and saved: C:/Users/adamd/Desktop/BEAR/BEAR/datasets/processed\black_bear\Black_Bear_10.jpg
Processed and saved: C:/Users/adamd/Desktop/BEAR/BEAR/datasets/processed\black_bear\Black_Bear_11.jpg
Processed and saved: C:/Users/adamd/Desktop/BEAR/BEAR/datasets/processed\black_bear\Black_Bear_12.jpg
Processed and saved: C:/Users/adamd/Desktop/BEAR/BEAR/datasets/processed\black_bear\Black_Bear_13.jpg
Processed and saved: C:/Users/adamd/Desktop/BEAR/BEAR/datasets/processed\black_bear\Black_Bear_14.jpg
Processed and saved: C:/Users/adamd/Desktop/BEAR/BEAR/datasets/processed\black_bear\Black_Bear_15.jpg
Processed and saved: C:/Users/adamd/Desktop/BEAR/BEAR/datasets/processed\black_bear\Black_Bear_16.jpg
Processed and saved: C:/Users/adamd/Desktop/BEAR/BEAR/datasets/processed\black_bear\Black_Bear_17.jp

In [29]:

import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

# Define paths
input_path = r"C:/Users/adamd/Desktop/BEAR/BEAR/datasets/processed"
output_path = r"C:/Users/adamd/Desktop/BEAR/BEAR/datasets/split_data"
os.makedirs(output_path, exist_ok=True)

# Define image size
IMG_SIZE = 224

# Define categories
categories = ['black_bear', 'grizzly_bear', 'panda_bear', 'polar_bear']

# Initialize lists to hold image data and labels
data = []
labels = []

# Loop through each category
for category in categories:
    input_folder = os.path.join(input_path, category)

    # Process each image in the folder
    for img_name in os.listdir(input_folder):
        try:
            # Read image
            img_path = os.path.join(input_folder, img_name)
            img = cv2.imread(img_path)

            # Resize image
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))

            # Append image and label to data and labels lists
            data.append(img)
            labels.append(categories.index(category))
        except Exception as e:
            print(f"Error processing {img_name}: {e}")

# Convert lists to numpy arrays
data = np.array(data)
labels = np.array(labels)

# Normalize pixel values to range [0, 1]
data = data / 255.0

# Split data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(data, labels, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Save the processed data to .npy files for later use
np.save(os.path.join(output_path, 'X_train.npy'), X_train)
np.save(os.path.join(output_path, 'y_train.npy'), y_train)
np.save(os.path.join(output_path, 'X_val.npy'), X_val)
np.save(os.path.join(output_path, 'y_val.npy'), y_val)
np.save(os.path.join(output_path, 'X_test.npy'), X_test)
np.save(os.path.join(output_path, 'y_test.npy'), y_test)

print("Dataset split and saved successfully!")

Dataset split and saved successfully!


In [30]:

import os
import shutil
from sklearn.model_selection import train_test_split

# Define paths to your raw dataset
raw_data_dir = r"C:/Users/adamd/Desktop/BEAR/BEAR/datasets/raw"
output_data_dir = r"C:/Users/adamd/Desktop/BEAR/BEAR/datasets/split_data"

# Create output directories for train, validation, and test sets
train_dir = os.path.join(output_data_dir, "train")
val_dir = os.path.join(output_data_dir, "val")
test_dir = os.path.join(output_data_dir, "test")

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Define categories (subspecies)
categories = ['black_bear', 'grizzly_bear', 'panda_bear', 'polar_bear']

# Loop through categories and split data
for category in categories:
    category_path = os.path.join(raw_data_dir, category)
    files = os.listdir(category_path)

    # Split the files into train, val, and test
    train_files, temp_files = train_test_split(files, test_size=0.4, random_state=42)
    val_files, test_files = train_test_split(temp_files, test_size=0.5, random_state=42)

    # Create class folders inside train, val, and test directories if they don't exist
    os.makedirs(os.path.join(train_dir, category), exist_ok=True)
    os.makedirs(os.path.join(val_dir, category), exist_ok=True)
    os.makedirs(os.path.join(test_dir, category), exist_ok=True)

    # Move the files to respective directories
    for file in train_files:
        shutil.move(os.path.join(category_path, file), os.path.join(train_dir, category, file))
    for file in val_files:
        shutil.move(os.path.join(category_path, file), os.path.join(val_dir, category, file))
    for file in test_files:
        shutil.move(os.path.join(category_path, file), os.path.join(test_dir, category, file))

print("Dataset split into train, val, and test directories.")

Dataset split into train, val, and test directories.


In [31]:

import os
import shutil
import random

# Paths to the raw data and the split data directories
raw_data_dir = r"C:/Users/adamd/Desktop/BEAR/BEAR/datasets/raw"
split_data_dir = r"C:/Users/adamd/Desktop/BEAR/BEAR/datasets/split_data"

# Define categories (use the actual categories in your dataset)
categories = ['black_bear', 'grizzly_bear', 'panda_bear', 'polar_bear']

# Split percentage (80% train, 10% validation, 10% test)
train_split = 0.8
val_split = 0.1
test_split = 0.1

# Create necessary directories
os.makedirs(os.path.join(split_data_dir, 'train'), exist_ok=True)
os.makedirs(os.path.join(split_data_dir, 'val'), exist_ok=True)
os.makedirs(os.path.join(split_data_dir, 'test'), exist_ok=True)

for category in categories:
    os.makedirs(os.path.join(split_data_dir, 'train', category), exist_ok=True)
    os.makedirs(os.path.join(split_data_dir, 'val', category), exist_ok=True)
    os.makedirs(os.path.join(split_data_dir, 'test', category), exist_ok=True)

# Move images to respective directories
for category in categories:
    category_path = os.path.join(raw_data_dir, category)

    # List files and shuffle for randomness
    files = os.listdir(category_path)
    random.shuffle(files)

    # Calculate split indexes
    total_files = len(files)
    train_end = int(train_split * total_files)
    val_end = train_end + int(val_split * total_files)

    # Split files into train, validation, and test sets
    train_files = files[:train_end]
    val_files = files[train_end:val_end]
    test_files = files[val_end:]

    # Move the files to respective directories
    for file in train_files:
        shutil.move(os.path.join(category_path, file), os.path.join(split_data_dir, 'train', category, file))
    for file in val_files:
        shutil.move(os.path.join(category_path, file), os.path.join(split_data_dir, 'val', category, file))
    for file in test_files:
        shutil.move(os.path.join(category_path, file), os.path.join(split_data_dir, 'test', category, file))

    print(f"Moved {len(train_files)} files to train, {len(val_files)} files to validation, {len(test_files)} files to test for {category}.")

print("Dataset has been split into train, validation, and test directories.")


Moved 0 files to train, 0 files to validation, 0 files to test for black_bear.
Moved 0 files to train, 0 files to validation, 0 files to test for grizzly_bear.
Moved 0 files to train, 0 files to validation, 0 files to test for panda_bear.
Moved 0 files to train, 0 files to validation, 0 files to test for polar_bear.
Dataset has been split into train, validation, and test directories.


In [None]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

# Paths to dataset
dataset_dir = r"C:/Users/adamd/Desktop/BEAR/BEAR/datasets/split_data"
train_dir = os.path.join(dataset_dir, "train")
val_dir = os.path.join(dataset_dir, "val")
test_dir = os.path.join(dataset_dir, "test")

# Parameters
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 20
NUM_CLASSES = len(os.listdir(train_dir))  # Assuming one folder per class

# Check dataset structure
print("Train classes:", os.listdir(train_dir))
print("Validation classes:", os.listdir(val_dir))
print("Test classes:", os.listdir(test_dir))

# Data augmentation and preprocessing
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest",
)

val_test_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

# Load datasets
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
)

val_generator = val_test_datagen.flow_from_directory(
    val_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
)

test_generator = val_test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False,  # Don't shuffle for evaluation
)

# Transfer Learning with VGG16
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze the base model

# Add custom layers on top
x = Flatten()(base_model.output)
x = Dense(256, activation="relu")(x)
x = Dropout(0.5)(x)
output_layer = Dense(NUM_CLASSES, activation="softmax")(x)

# Define model
model = Model(inputs=base_model.input, outputs=output_layer)

# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)

# Train the model
early_stopping = EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True)

history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    callbacks=[early_stopping],
)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Classification Report
y_true = test_generator.classes
y_pred = model.predict(test_generator)
y_pred_classes = y_pred.argmax(axis=-1)

print("Classification Report:")
print(classification_report(y_true, y_pred_classes, target_names=test_generator.class_indices.keys()))

# Confusion Matrix
conf_matrix = confusion_matrix(y_true, y_pred_classes)
print("Confusion Matrix:")
print(conf_matrix)

# Plot Training and Validation Accuracy
plt.plot(history.history["accuracy"], label="Train Accuracy")
plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Training and Validation Accuracy")
plt.show()

# Save the model
model.save("bear_classifier_model.h5")
print("Model saved as bear_classifier_model.h5")


Train classes: ['black_bear', 'grizzly_bear', 'panda_bear', 'polar_bear']
Validation classes: ['black_bear', 'grizzly_bear', 'panda_bear', 'polar_bear']
Test classes: ['black_bear', 'grizzly_bear', 'panda_bear', 'polar_bear']
Found 361 images belonging to 4 classes.
Found 150 images belonging to 4 classes.
Found 154 images belonging to 4 classes.
Epoch 1/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 6s/step - accuracy: 0.3755 - loss: 5.1856 - val_accuracy: 0.4733 - val_loss: 1.4602
Epoch 2/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 6s/step - accuracy: 0.5472 - loss: 1.8066 - val_accuracy: 0.6667 - val_loss: 0.8163
Epoch 3/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 6s/step - accuracy: 0.6027 - loss: 0.9678 - val_accuracy: 0.7067 - val_loss: 0.8104
Epoch 4/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 6s/step - accuracy: 0.6680 - loss: 0.8059 - val_accuracy: 0.7667 - val_loss: 0.7406
Epoch 5/20
