In [2]:
import requests
from bs4 import BeautifulSoup
import urllib.request
import os
import time
from urllib.error import HTTPError, URLError

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'
}

# Function to download images from a specific page with retry mechanism
def download_images_from_page(page_number, start_index):
    url = f'https://www.freeimages.com/search/lily/{page_number}'
    retry_count = 5
    backoff_factor = 1  # Initial backoff factor
    for attempt in range(retry_count):
        try:
            source = requests.get(url, headers=headers, timeout=10).text
            break  # If the request is successful, exit the retry loop
        except requests.exceptions.RequestException as e:
            print(f"Attempt {attempt + 1} failed: {e}. Retrying in {backoff_factor} seconds...")
            time.sleep(backoff_factor)
            backoff_factor *= 2  # Exponential backoff
    else:
        print(f"Failed to fetch page {page_number} after {retry_count} attempts.")
        return 0  # No images downloaded

    soup = BeautifulSoup(source, 'lxml')

    Images = []
    max_images = 2000

    img_links = soup.select('img[src^="https://images.freeimages.com/images"]')

    for i in range(min(len(img_links), max_images)):
        Images.append(img_links[i]['src'])

    save_dir = 'C:/Users/LENOVO/Documents/AI - PROJECT/FlowersDataset/lily/'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    for i in range(len(Images)):
        name = f"{save_dir}lily{start_index + i}.jpg"
        try:
            urllib.request.urlretrieve(Images[i], name)
            print(f"Downloaded {name}")
        except HTTPError as e:
            print(f"HTTPError {e.code}: {e.reason} for {Images[i]}")
        except URLError as e:
            print(f"URLError: {e.reason} for {Images[i]}")
        except Exception as e:
            print(f"Error downloading {Images[i]}: {e}")

    return len(Images)

# Download images from multiple pages without resuming
def download_images(max_images):
    total_images_downloaded = 0
    current_page = 1

    while total_images_downloaded < max_images:
        new_images_downloaded = download_images_from_page(current_page, total_images_downloaded)
        if new_images_downloaded == 0:
            print(f"No more images found or failed to download on page {current_page}.")
            break  # Stop the loop if no images were downloaded
        total_images_downloaded += new_images_downloaded
        current_page += 1

    print("Download completed.")

# Call the function to download images from scratch
download_images(2000)


Downloaded C:/Users/LENOVO/Documents/AI - Mini Project/FlowersDataset/lily/lily0.jpg
Downloaded C:/Users/LENOVO/Documents/AI - Mini Project/FlowersDataset/lily/lily1.jpg
Downloaded C:/Users/LENOVO/Documents/AI - Mini Project/FlowersDataset/lily/lily2.jpg
Downloaded C:/Users/LENOVO/Documents/AI - Mini Project/FlowersDataset/lily/lily3.jpg
Downloaded C:/Users/LENOVO/Documents/AI - Mini Project/FlowersDataset/lily/lily4.jpg
Downloaded C:/Users/LENOVO/Documents/AI - Mini Project/FlowersDataset/lily/lily5.jpg
Downloaded C:/Users/LENOVO/Documents/AI - Mini Project/FlowersDataset/lily/lily6.jpg
Downloaded C:/Users/LENOVO/Documents/AI - Mini Project/FlowersDataset/lily/lily7.jpg
Downloaded C:/Users/LENOVO/Documents/AI - Mini Project/FlowersDataset/lily/lily8.jpg
Downloaded C:/Users/LENOVO/Documents/AI - Mini Project/FlowersDataset/lily/lily9.jpg
Downloaded C:/Users/LENOVO/Documents/AI - Mini Project/FlowersDataset/lily/lily10.jpg
Downloaded C:/Users/LENOVO/Documents/AI - Mini Project/FlowersDa

In [1]:
import os
import shutil
import random

# Define the base directory where the images are stored
base_dir = 'C:/Users/LENOVO/Documents/AI - PROJECT/FlowersDataset'

# Define the flower types (classes)
flower_types = ['dahlias', 'daisies', 'roses', 'lilies', 'sunflowers']

# Define the split ratios
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# Function to split dataset
def split_dataset(base_dir, classes, train_ratio, val_ratio, test_ratio):
    for class_name in classes:
        class_dir = os.path.join(base_dir, class_name)
        images = os.listdir(class_dir)
        random.shuffle(images)  # Shuffle the images

        total_images = len(images)
        train_size = int(train_ratio * total_images)
        val_size = int(val_ratio * total_images)
        test_size = total_images - train_size - val_size

        train_images = images[:train_size]
        val_images = images[train_size:train_size + val_size]
        test_images = images[train_size + val_size:]

        # Create directories for train, val, test splits if they don't exist
        for subset in ['train', 'val', 'test']:
            subset_dir = os.path.join(base_dir, subset, class_name)
            if not os.path.exists(subset_dir):
                os.makedirs(subset_dir)

        # Move images to respective directories
        for subset, subset_images in zip(['train', 'val', 'test'], [train_images, val_images, test_images]):
            subset_dir = os.path.join(base_dir, subset, class_name)
            for img in subset_images:
                src_path = os.path.join(class_dir, img)
                dst_path = os.path.join(subset_dir, img)
                shutil.move(src_path, dst_path)
                print(f"Moved {img} to {subset_dir}")

# Split the dataset
split_dataset(base_dir, flower_types, train_ratio, val_ratio, test_ratio)

print("Dataset split completed.")

Moved dahlia1649.jpg to C:/Users/LENOVO/Documents/AI - PROJECT/FlowersDataset\train\dahlias
Moved dahlia1710.jpg to C:/Users/LENOVO/Documents/AI - PROJECT/FlowersDataset\train\dahlias
Moved dahlia987.jpg to C:/Users/LENOVO/Documents/AI - PROJECT/FlowersDataset\train\dahlias
Moved dahlia1009.jpg to C:/Users/LENOVO/Documents/AI - PROJECT/FlowersDataset\train\dahlias
Moved dahlia385.jpg to C:/Users/LENOVO/Documents/AI - PROJECT/FlowersDataset\train\dahlias
Moved dahlia1929.jpg to C:/Users/LENOVO/Documents/AI - PROJECT/FlowersDataset\train\dahlias
Moved dahlia1041.jpg to C:/Users/LENOVO/Documents/AI - PROJECT/FlowersDataset\train\dahlias
Moved dahlia1469.jpg to C:/Users/LENOVO/Documents/AI - PROJECT/FlowersDataset\train\dahlias
Moved dahlia481.jpg to C:/Users/LENOVO/Documents/AI - PROJECT/FlowersDataset\train\dahlias
Moved dahlia955.jpg to C:/Users/LENOVO/Documents/AI - PROJECT/FlowersDataset\train\dahlias
Moved dahlia844.jpg to C:/Users/LENOVO/Documents/AI - PROJECT/FlowersDataset\train\d