In [2]:
from pinscrape import scraper, Pinterest

def using_search_engine(keyword, output_folder, images_to_download, proxies={}, number_of_workers=10):

    details = scraper.scrape(keyword, output_folder, proxies, number_of_workers, images_to_download)
    if details["isDownloaded"]:
        print("\nDownloading completed !!")
        print(f"\nTotal urls found: {len(details['extracted_urls'])}")
        print(f"\nTotal images downloaded (including duplicate images): {len(details['urls_list'])}")
        print(details)
    else:
        print("\nNothing to download !!", details)



In [3]:

def using_pinterest_apis(keyword, output_folder, images_to_download, proxies={}, number_of_workers=20):

    p = Pinterest(proxies=proxies) 
    images_url = p.search(keyword, images_to_download)
    p.download(url_list=images_url, number_of_workers=number_of_workers, output_folder=output_folder)


In [8]:
season_tuple = (
    'Spring 2020', 'Summer 2020', 'Fall 2020', 'Autumn 2020', 'Winter 2020',
    'Spring 2021', 'Summer 2021', 'Fall 2021', 'Autumn 2021', 'Winter 2021',
    'Spring 2022', 'Summer 2022', 'Fall 2022', 'Autumn 2022', 'Winter 2022',
    'Spring 2023', 'Summer 2023', 'Fall 2023', 'Autumn 2023', 'Winter 2023',
    'Spring 2024', 'Summer 2024', 'Fall 2024', 'Autumn 2024', 'Winter 2024',
    'Spring 2025', 'Summer 2025', 'Fall 2025', 'Autumn 2025', 'Winter 2025',
)

# Number of images to download
images_to_download = 150

# Loop through the months tuple
for month in season_tuple:
    keyword = f"casual dresses women {month}"
    output_folder = f"images\{month}"
    using_pinterest_apis(keyword, output_folder, images_to_download)


In [13]:
import os
import cv2
from pathlib import Path

def process_folder(folder_path, output_path_1, output_path_2):
    """
    Processes a folder to filter out non-portrait/selfie images.
    Args:
        folder_path (str): Path to the folder containing images.
        output_path_1 (str): Path to save images we need.
        output_path_2 (str): Path to save images we don't need.
    """
    # Create output folders if they don't exist
    os.makedirs(output_path_1, exist_ok=True)
    os.makedirs(output_path_2, exist_ok=True)

    # Load OpenCV's pre-trained face detection model (Haar Cascade)
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")

    # Iterate through all files in the folder
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)

        # Check if the file is an image
        if file_name.lower().endswith((".jpg", ".jpeg", ".png")):
            # Read the image
            image = cv2.imread(file_path)

            if image is None:
                continue

            # Convert image to grayscale for face detection
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

            # Detect faces in the image
            faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

            # Filter based on number and size of detected faces
            if len(faces) == 1:  # Keep images with exactly one face
                x, y, w, h = faces[0]
                face_area = w * h
                image_area = image.shape[0] * image.shape[1]

                # Keep images where the face occupies a significant portion of the image
                if face_area / image_area > 0.1:
                    # Save the image to images_1 folder
                    output_file_path = os.path.join(output_path_1, file_name)
                    cv2.imwrite(output_file_path, image)
                else:
                    # Save the image to images_2 folder
                    output_file_path = os.path.join(output_path_2, file_name)
                    cv2.imwrite(output_file_path, image)
            else:
                # Save the image to images_2 folder
                output_file_path = os.path.join(output_path_2, file_name)
                cv2.imwrite(output_file_path, image)

def process_all_folders(base_path, season_tuple, images_1_path, images_2_path):
    """
    Processes all folders in season_tuple.
    Args:
        base_path (str): Path to the base directory containing folders.
        season_tuple (tuple): List of all subfolder names.
        images_1_path (str): Path to the output directory for images_1.
        images_2_path (str): Path to the output directory for images_2.
    """
    for folder_name in season_tuple:
        folder_path = os.path.join(base_path, folder_name)

        if os.path.exists(folder_path):
            print(f"Processing folder: {folder_name}")
            output_path_1 = os.path.join(images_1_path, folder_name)
            output_path_2 = os.path.join(images_2_path, folder_name)
            process_folder(folder_path, output_path_1, output_path_2)

if __name__ == "__main__":
    # Path to the dataset base directory
    base_directory = "images"

    # Path to images_1 and images_2 directories
    images_1_directory = "images_1"
    images_2_directory = "images_2"

    # Tuple containing all subfolder names
    season_tuple = (
        'Spring 2020', 'Summer 2020', 'Fall 2020', 'Autumn 2020', 'Winter 2020',
        'Spring 2021', 'Summer 2021', 'Fall 2021', 'Autumn 2021', 'Winter 2021',
        'Spring 2022', 'Summer 2022', 'Fall 2022', 'Autumn 2022', 'Winter 2022',
        'Spring 2023', 'Summer 2023', 'Fall 2023', 'Autumn 2023', 'Winter 2023',
        'Spring 2024', 'Summer 2024', 'Fall 2024', 'Autumn 2024', 'Winter 2024',
        'Spring 2025', 'Summer 2025', 'Fall 2025', 'Autumn 2025', 'Winter 2025',
    )

    # Process all folders
    process_all_folders(base_directory, season_tuple, images_1_directory, images_2_directory)
    print("Filtering complete!")


Processing folder: Spring 2020
Processing folder: Summer 2020
Processing folder: Fall 2020
Processing folder: Autumn 2020
Processing folder: Winter 2020
Processing folder: Spring 2021
Processing folder: Summer 2021
Processing folder: Fall 2021
Processing folder: Autumn 2021
Processing folder: Winter 2021
Processing folder: Spring 2022


KeyboardInterrupt: 