# Real-Time Dangerous Animal Detection Using Big Data and Deep Learning with YOLO

- This project aims to develop a real-time dangerous animal detection system using YOLO (You Only Look Once) with deep learning and big data processing techniques. The system is designed to enhance public safety by detecting potentially hazardous animals such as snakes, scorpions, and wolves through camera feeds. The dataset, consisting of both real-world and augmented images, is preprocessed using PySpark to ensure consistency in format, resolution, and quality. Various augmentation techniques, including motion blur, are applied to improve performance in dynamic environments. The model is trained with YOLOv8 to enhance detection accuracy under different conditions.

### `Step-1` : Gathering Dataset
- The dataset includes eight selected dangerous animal classes that are commonly seen in public areas: bear, crocodile, hawk, lion, scorpion, snake, spider, and wolf. To gather the dataset, images were sourced from `Google Images`, `Pexels`, and `Unsplash`. For Google Images, the GoogleImageCrawler library was used to automate the downloading process. For Pexels and Unsplash, a developer account was created to obtain an API key for downloading images. Approximately 3,000 images were collected in total.

In [9]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import os
import requests
import time
from icrawler.builtin import GoogleImageCrawler

In [3]:
# pip install icrawler

In [5]:
# pip install pytesseract

In [7]:
# pip install opencv-python

In [5]:
# pip install requests


### The Images from Google Image

In [None]:
# Set the folder and animal classes
animal_classes = ['snake', 'scorpion', 'spider', 'crocodile', 'wolf', 'bear', 'lion', 'hawk']  # List of animal classes
download_folder = 'google_image'  # Name of the main folder where images will be saved

# Function to download images
def download_images():
    # Create main google_image folder if not exists
    os.makedirs(download_folder, exist_ok=True)  # Creates the root folder 'google_image' if it doesn't exist

    # Loop through each animal class in the list
    for animal in animal_classes:
        print(f"üîç Downloading images for: {animal}")  # Print a message to indicate which animal's images are being downloaded
        
        # Create subfolder for each animal class within google_image folder
        animal_folder = os.path.join(download_folder, animal)  # Define the path for the animal-specific subfolder
        os.makedirs(animal_folder, exist_ok=True)  # Create the folder for the current animal if it doesn't exist

        # Initialize GoogleImageCrawler
        google_crawler = GoogleImageCrawler(storage={'root_dir': animal_folder})  # Set up the image crawler for the current animal folder

        # Download images using the GoogleImageCrawler
        google_crawler.crawl(
            keyword=f"{animal} in natural habitat",  # The search keyword will be the animal in its natural habitat
            max_num=images_per_class,  # Limit the number of images to download per animal
            file_idx_offset='auto'  # Automatically assign file indices for the images
        )

        # Short delay to ensure system releases file locks
        time.sleep(5)  # Add a small delay to avoid overloading the system and ensure file operations are properly finished

        print(f"‚úÖ Download complete for: {animal}")  # Print a message when the download is complete for the current animal

# Run the function to start downloading images
download_images()  # Call the function to start the image downloading process

print("‚úÖ All image downloads complete!")  # Print a final message indicating that all image downloads are done


### The Images from Pexels

In [None]:
# Pexels API Key (replace with your own key)
PEXELS_API_KEY = '**************'

# Animal classes to download images for
animal_classes = ['snake', 'scorpion', 'spider', 'crocodile', 'wolf', 'bear', 'lion', 'hawk']

# Function to download images from Pexels
def download_pexels_images(query, num_images=500, parent_dir="pexels_images"):
    headers = {'Authorization': PEXELS_API_KEY}  # API authorization header

    # Create parent directory and class-specific folder
    os.makedirs(parent_dir, exist_ok=True)
    animal_folder = os.path.join(parent_dir, query)
    os.makedirs(animal_folder, exist_ok=True)

    # Variables for pagination and tracking downloaded images
    per_page = 80  # Pexels API limit per page
    total_images_downloaded = 0
    page = 1

    # Download loop until desired number of images is reached
    while total_images_downloaded < num_images:
        url = f'https://api.pexels.com/v1/search?query={query}&per_page={per_page}&page={page}'
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            data = response.json()
            if not data['photos']:  # Stop if no more images found
                print("No more images found!")
                break

            # Download images and save with original names
            for photo in data['photos']:
                img_url = photo['src']['original']
                img_path = os.path.join(animal_folder, os.path.basename(img_url))

                try:
                    img_data = requests.get(img_url).content
                    with open(img_path, 'wb') as f:
                        f.write(img_data)
                    total_images_downloaded += 1
                    print(f"Downloaded {img_path}")

                    # Stop once target number of images is reached
                    if total_images_downloaded >= num_images:
                        break
                except Exception as e:
                    print(f"Error downloading {img_path}: {e}")

            # Move to the next page if needed
            page += 1
        else:
            print(f"Error fetching data for {query} from Pexels API")
            break

# Download 500 images for each animal class
for animal in animal_classes:
    download_pexels_images(animal, num_images=500, parent_dir="pexels_images")

print("Download completed for all animal classes!")


### The Images from Unsplash

In [None]:
# List of dangerous animal classes
animal_classes = ['snake', 'scorpion', 'spider', 'crocodile', 'wolf', 'bear', 'lion', 'hawk']

# Function to download images from Unsplash
def download_images(query, client_id, download_path, num_images=500):
    if not os.path.exists(download_path):
        os.makedirs(download_path)

    url = "https://api.unsplash.com/search/photos"
    params = {
        "query": query,
        "client_id": client_id,
        "per_page": 30  # Unsplash API limit per request
    }

    total_downloaded = 0
    page = 1

    # Continue downloading until reaching desired image count
    while total_downloaded < num_images:
        params["page"] = page
        response = requests.get(url, params=params)

        if response.status_code == 200:
            data = response.json()
            if not data['results']:
                print(f"No more images found for {query}")
                break

            # Download and save images
            for idx, photo in enumerate(data['results']):
                if total_downloaded >= num_images:
                    break
                image_url = photo['urls']['regular']
                image_response = requests.get(image_url)
                if image_response.status_code == 200:
                    with open(os.path.join(download_path, f"{query}_{total_downloaded + 1}.jpg"), 'wb') as file:
                        file.write(image_response.content)
                    print(f"Downloaded {query}_{total_downloaded + 1}.jpg")
                    total_downloaded += 1
                else:
                    print(f"Failed to download image {total_downloaded + 1}")
        else:
            print(f"Error fetching data from Unsplash API: {response.status_code}")
            break

        page += 1

# Example usage
if __name__ == "__main__":
    CLIENT_ID = "**********"  # Replace with your Unsplash API key
    base_path = "./unsplash"

    for animal in animal_classes:
        download_images(animal, CLIENT_ID, os.path.join(base_path, animal), num_images=500)
