# ImageScrape

In [4]:
import requests
from bs4 import BeautifulSoup
import os
from urllib.parse import urljoin
from datetime import datetime

The following two cells contain two different versions of the same code, aiming to download images from a specified web page URL and save them to a local folder. The second version enhances file naming to avoid overwriting previous files. Both codes are popular and can be considered useful tools for quick data gathering.

In [1]:
def download_images(url, folder_path):
    # Make a request to the URL
    response = requests.get(url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find all image tags
        img_tags = soup.find_all('img')

        # Create a folder for the images if it doesn't exist
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        # Download each image
        for img_tag in img_tags:
            img_url = img_tag.get('src')

            # If the image URL is relative, make it absolute
            img_url = urljoin(url, img_url)

            # Get the image content
            img_data = requests.get(img_url).content

            # Save the image to the folder
            img_name = os.path.join(folder_path, f'image_{img_tags.index(img_tag)}.jpg')
            with open(img_name, 'wb') as img_file:
                img_file.write(img_data)
                print(f'Saved: {img_name}')
    else:
        print(f'Failed to retrieve the web page. Status Code: {response.status_code}')

# Specify the URL of the web page you want to scrape images from
url_to_scrape = 'https://www.google.com/search?client=safari&sca_esv=589042230&sxsrf=AM9HkKkdg3jRbXOmbcIsCxk4DIxStjVCHw:1702031156725&q=paintings+of+medieval+period&tbm=isch&source=lnms&sa=X&ved=2ahUKEwjP0_iT0P-CAxVVcfEDHVKKALAQ0pQJegQIDRAB&biw=1440&bih=820&dpr=2'

# Specify the folder where you want to save the images
output_folder = '/Users/liliamarzougui/Desktop/Folder'

download_images(url_to_scrape, output_folder)

Saved: /Users/liliamarzougui/Desktop/Folder/image_0.jpg
Saved: /Users/liliamarzougui/Desktop/Folder/image_1.jpg
Saved: /Users/liliamarzougui/Desktop/Folder/image_2.jpg
Saved: /Users/liliamarzougui/Desktop/Folder/image_3.jpg
Saved: /Users/liliamarzougui/Desktop/Folder/image_4.jpg
Saved: /Users/liliamarzougui/Desktop/Folder/image_5.jpg
Saved: /Users/liliamarzougui/Desktop/Folder/image_6.jpg
Saved: /Users/liliamarzougui/Desktop/Folder/image_7.jpg
Saved: /Users/liliamarzougui/Desktop/Folder/image_8.jpg
Saved: /Users/liliamarzougui/Desktop/Folder/image_9.jpg
Saved: /Users/liliamarzougui/Desktop/Folder/image_10.jpg
Saved: /Users/liliamarzougui/Desktop/Folder/image_11.jpg
Saved: /Users/liliamarzougui/Desktop/Folder/image_12.jpg
Saved: /Users/liliamarzougui/Desktop/Folder/image_13.jpg
Saved: /Users/liliamarzougui/Desktop/Folder/image_14.jpg
Saved: /Users/liliamarzougui/Desktop/Folder/image_15.jpg
Saved: /Users/liliamarzougui/Desktop/Folder/image_16.jpg
Saved: /Users/liliamarzougui/Desktop/Fold

In [5]:
def download_images(url, folder_path):
    # Make a request to the URL
    response = requests.get(url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find all image tags
        img_tags = soup.find_all('img')

        # Create a folder for the images if it doesn't exist
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        # Download each image
        for img_tag in img_tags:
            img_url = img_tag.get('src')

            # If the image URL is relative, make it absolute
            img_url = urljoin(url, img_url)

            # Get the image content
            img_data = requests.get(img_url).content

            # Generate a unique filename based on the current timestamp
            timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")
            img_name = os.path.join(folder_path, f'image_{timestamp}.jpg')

            # Save the image to the folder
            with open(img_name, 'wb') as img_file:
                img_file.write(img_data)
                print(f'Saved: {img_name}')
    else:
        print(f'Failed to retrieve the web page. Status Code: {response.status_code}')

# Specify the URL of the web page you want to scrape images from
url_to_scrape = 'https://www.google.com/search?client=safari&sca_esv=589042230&sxsrf=AM9HkKkdg3jRbXOmbcIsCxk4DIxStjVCHw:1702031156725&q=paintings+of+medieval+period&tbm=isch&source=lnms&sa=X&ved=2ahUKEwjP0_iT0P-CAxVVcfEDHVKKALAQ0pQJegQIDRAB&biw=1440&bih=820&dpr=2'

# Specify the folder where you want to save the images
output_folder = '/Users/liliamarzougui/Desktop/CrackInsta'

download_images(url_to_scrape, output_folder)

Saved: /Users/liliamarzougui/Desktop/CrackInsta/image_20231210000502680615.jpg
Saved: /Users/liliamarzougui/Desktop/CrackInsta/image_20231210000502839802.jpg
Saved: /Users/liliamarzougui/Desktop/CrackInsta/image_20231210000502972914.jpg
Saved: /Users/liliamarzougui/Desktop/CrackInsta/image_20231210000503110738.jpg
Saved: /Users/liliamarzougui/Desktop/CrackInsta/image_20231210000503265935.jpg
Saved: /Users/liliamarzougui/Desktop/CrackInsta/image_20231210000503396285.jpg
Saved: /Users/liliamarzougui/Desktop/CrackInsta/image_20231210000503524651.jpg
Saved: /Users/liliamarzougui/Desktop/CrackInsta/image_20231210000503647156.jpg
Saved: /Users/liliamarzougui/Desktop/CrackInsta/image_20231210000503782125.jpg
Saved: /Users/liliamarzougui/Desktop/CrackInsta/image_20231210000503908602.jpg
Saved: /Users/liliamarzougui/Desktop/CrackInsta/image_20231210000504037668.jpg
Saved: /Users/liliamarzougui/Desktop/CrackInsta/image_20231210000504182775.jpg
Saved: /Users/liliamarzougui/Desktop/CrackInsta/imag