In [8]:
import os
import shutil
import pathlib
import zipfile

def extract_images_from_excel(path, output_folder_name='extracted_images'):
    """
    Extracts images from an Excel file and stores them in a single folder.

    Args:
        path (pathlib.Path or str): Excel file path.
        output_folder_name (str): Name of the folder to store the extracted images.
            Defaults to 'extracted_images'.

    Returns:
        new_paths (list[pathlib.Path]): List of paths to the extracted images.
    """
    # Convert path to pathlib.Path if it's a string
    if isinstance(path, str):
        path = pathlib.Path(path)

    # Check if the file has the '.xlsx' extension
    if path.suffix != '.xlsx':
        raise ValueError('Path must be an xlsx file')

    # Extract the filename (excluding the extension) using .stem
    name = path.stem

    # Create a new folder for the extracted images
    output_folder = path.parent / output_folder_name
    output_folder.mkdir(exist_ok=True)  # Create folder if it doesn't exist

    # Create a temporary directory for unzipping the Excel file
    temp_dir = path.parent / 'temp'
    temp_dir.mkdir(exist_ok=True)  # Create folder if it doesn't exist

    try:
        # Unzip the Excel file into the temporary directory
        with zipfile.ZipFile(path, 'r') as zip_ref:
            zip_ref.extractall(temp_dir)

        # Locate the 'media' directory within the unzipped content
        media_dir = temp_dir / 'xl' / 'media'

        image_index = 0  # Initialize an index for the images
        new_paths = []  # List to store the paths of the extracted images

        # Iterate through the files in the 'media' directory
        for root, dirs, files in os.walk(media_dir):
            for file in files:
                image_index += 1  # Increment the image index for each image found

                # Construct paths for the original image and the new destination
                image_path = pathlib.Path(root) / file
                new_path = output_folder / f'{name}-{str(image_index)}.png'

                # Copy the image to the output folder with a new name
                shutil.copy(image_path, new_path)

                # Store the new path in the list
                new_paths.append(new_path)

    finally:
        # Cleanup: Remove the temporary directory
        shutil.rmtree(temp_dir)

    # Return the list of paths to the extracted images
    return new_paths
path=r"C:\Users\Subhadeep\Downloads\multi table oxford sheet.xlsx"
extract_images_from_excel(path, output_folder_name='bartrack_mns_images')




[WindowsPath('C:/Users/Subhadeep/Downloads/bartrack_mns_images/multi table oxford sheet-1.png'),
 WindowsPath('C:/Users/Subhadeep/Downloads/bartrack_mns_images/multi table oxford sheet-2.png'),
 WindowsPath('C:/Users/Subhadeep/Downloads/bartrack_mns_images/multi table oxford sheet-3.png'),
 WindowsPath('C:/Users/Subhadeep/Downloads/bartrack_mns_images/multi table oxford sheet-4.png'),
 WindowsPath('C:/Users/Subhadeep/Downloads/bartrack_mns_images/multi table oxford sheet-5.png')]

In [20]:
import os
import zipfile
from openpyxl import load_workbook

# Path to the Excel file
excel_file_path = r"C:\Users\Subhadeep\Downloads\multi table oxford sheet.xlsx"

# Directory to extract contents
extracted_dir = 'C:/Users/Subhadeep/Downloads/extracted_contents'

# Create a directory to extract contents
os.makedirs(extracted_dir, exist_ok=True)

# Extract the contents of the Excel file
with zipfile.ZipFile(excel_file_path, "r") as zip_ref:
    zip_ref.extractall(extracted_dir)

