In [70]:
import os
import shutil
import pathlib
import zipfile

def extract_images_from_excel(path, output_folder_name='extracted_images'):
    """
    Extracts images from an Excel file and stores them in a single folder.

    Args:
        path (pathlib.Path or str): Excel file path.
        output_folder_name (str): Name of the folder to store the extracted images.
            Defaults to 'extracted_images'.

    Returns:
        new_paths (list[pathlib.Path]): List of paths to the extracted images.
    """
    # Convert path to pathlib.Path if it's a string
    if isinstance(path, str):
        path = pathlib.Path(path)

    # Check if the file has the '.xlsx' extension
    if path.suffix != '.xlsx':
        raise ValueError('Path must be an xlsx file')

    # Extract the filename (excluding the extension) using .stem
    name = path.stem

    # Create a new folder for the extracted images
    output_folder = path.parent / output_folder_name
    output_folder.mkdir(exist_ok=True)  # Create folder if it doesn't exist

    # Create a temporary directory for unzipping the Excel file
    temp_dir = path.parent / 'temp'
    temp_dir.mkdir(exist_ok=True)  # Create folder if it doesn't exist

    try:
        # Unzip the Excel file into the temporary directory
        with zipfile.ZipFile(path, 'r') as zip_ref:
            zip_ref.extractall(temp_dir)

        # Locate the 'media' directory within the unzipped content
        media_dir = temp_dir / 'xl' / 'media'

        image_index = 0  # Initialize an index for the images
        new_paths = []  # List to store the paths of the extracted images

        # Iterate through the files in the 'media' directory
        for root, dirs, files in os.walk(media_dir):
            for file in files:
                image_index += 1  # Increment the image index for each image found

                # Construct paths for the original image and the new destination
                image_path = pathlib.Path(root) / file
                new_path = output_folder / f'{name}-{str(image_index)}.png'

                # Copy the image to the output folder with a new name
                shutil.copy(image_path, new_path)

                # Store the new path in the list
                new_paths.append(new_path)

    finally:
        # Cleanup: Remove the temporary directory
        shutil.rmtree(temp_dir)

    # Return the list of paths to the extracted images
    return new_paths
path=r"C:\Users\Subhadeep\Downloads\multi table oxford sheet.xlsx"
extract_images_from_excel(path, output_folder_name='bartrack_mns_images')




[WindowsPath('C:/Users/Subhadeep/Downloads/bartrack_mns_images/multi table oxford sheet-1.png'),
 WindowsPath('C:/Users/Subhadeep/Downloads/bartrack_mns_images/multi table oxford sheet-2.png'),
 WindowsPath('C:/Users/Subhadeep/Downloads/bartrack_mns_images/multi table oxford sheet-3.png'),
 WindowsPath('C:/Users/Subhadeep/Downloads/bartrack_mns_images/multi table oxford sheet-4.png'),
 WindowsPath('C:/Users/Subhadeep/Downloads/bartrack_mns_images/multi table oxford sheet-5.png')]

In [20]:
import os
import zipfile
from openpyxl import load_workbook

# Path to the Excel file
excel_file_path = r"C:\Users\Subhadeep\Downloads\multi table oxford sheet.xlsx"

# Directory to extract contents
extracted_dir = 'C:/Users/Subhadeep/Downloads/extracted_contents'

# Create a directory to extract contents
os.makedirs(extracted_dir, exist_ok=True)

# Extract the contents of the Excel file
with zipfile.ZipFile(excel_file_path, "r") as zip_ref:
    zip_ref.extractall(extracted_dir)



In [21]:
!pip install xlwings


DEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063






## Split workbook

In [65]:
import xlwings as xw
import os

def split_workbook(excel_file_path):
    wb = xw.Book(excel_file_path)
    output_dir = os.path.join(os.path.dirname(excel_file_path), "Split_Worksheets")
    os.makedirs(output_dir, exist_ok=True)
    for sheet in wb.sheets:
    # Create a new workbook
        new_wb = xw.Book()
        new_ws = new_wb.sheets[0]
        sheet.api.Copy(Before=new_ws.api)
        output_file_path = os.path.join(output_dir, f"{sheet.name}.xlsx")
        new_wb.save(output_file_path)
        new_wb.close()
    
    

    
    

In [67]:
split_workbook(r"C:\Users\Subhadeep\Downloads\Bartrack Files\mns bartrack.xlsx")

## Extract Specific Sheet

In [68]:
def split_specific_sheet(excel_file_path, sheet_name):
    wb = xw.Book(excel_file_path)
    output_dir = os.path.join(os.path.dirname(excel_file_path), "Split_Specific_Sheet")
    os.makedirs(output_dir, exist_ok=True)
    
    sheet = wb.sheets[sheet_name]
    
    # Create a new workbook
    new_wb = xw.Book()
    new_ws = new_wb.sheets[0]
    sheet.api.Copy(Before=new_ws.api)
    
    output_file_path = os.path.join(output_dir, f"{sheet_name}.xlsx")
    new_wb.save(output_file_path)
    new_wb.close()

In [69]:
split_specific_sheet(r"C:\Users\Subhadeep\Downloads\Bartrack Files\mns bartrack.xlsx", 'MNS0001')

In [None]:
extract_images_from_excel()

## Extract Image from specific sheet

In [76]:
import os
import shutil
import zipfile
import pathlib
import xlwings as xw

def split_specific_sheet(excel_file_path, sheet_name):
    """
    Splits a specific sheet of an Excel file into a separate workbook.

    Args:
        excel_file_path (str): Excel file path.
        sheet_name (str): Name of the specific sheet to split.
    """
    # Open the Excel workbook
    wb = xw.Book(excel_file_path)
    
    try:
        # Find the specific sheet
        sheet = wb.sheets[sheet_name]
        
        # Create a new workbook
        new_wb = xw.Book()
        
        try:
            # Copy the specific sheet to the new workbook
            sheet.api.Copy(Before=new_wb.sheets[0].api)
        except Exception as e:
            print(f"Failed to copy sheet: {e}")
        finally:
            # Save and close the new workbook
            new_wb.save(os.path.join(os.path.dirname(excel_file_path), f"{sheet_name}.xlsx"))
            new_wb.close()
    except KeyError:
        print(f"Sheet '{sheet_name}' not found in the workbook.")

def extract_images_from_excel(path, output_folder_name='extracted_images'):
    """
    Extracts images from an Excel file and stores them in a single folder.

    Args:
        path (pathlib.Path or str): Excel file path.
        output_folder_name (str): Name of the folder to store the extracted images.
            Defaults to 'extracted_images'.

    Returns:
        new_paths (list[pathlib.Path]): List of paths to the extracted images.
    """
    # Convert path to pathlib.Path if it's a string
    if isinstance(path, str):
        path = pathlib.Path(path)

    # Check if the file has the '.xlsx' extension
    if path.suffix != '.xlsx':
        raise ValueError('Path must be an xlsx file')

    # Extract the filename (excluding the extension) using .stem
    name = path.stem

    # Create a new folder for the extracted images
    output_folder = path.parent / output_folder_name
    output_folder.mkdir(exist_ok=True)  # Create folder if it doesn't exist

    # Create a temporary directory for unzipping the Excel file
    temp_dir = path.parent / 'temp'
    temp_dir.mkdir(exist_ok=True)  # Create folder if it doesn't exist

    try:
        # Unzip the Excel file into the temporary directory
        with zipfile.ZipFile(path, 'r') as zip_ref:
            zip_ref.extractall(temp_dir)

        # Locate the 'media' directory within the unzipped content
        media_dir = temp_dir / 'xl' / 'media'

        image_index = 0  # Initialize an index for the images
        new_paths = []  # List to store the paths of the extracted images

        # Iterate through the files in the 'media' directory
        for root, dirs, files in os.walk(media_dir):
            for file in files:
                image_index += 1  # Increment the image index for each image found

                # Construct paths for the original image and the new destination
                image_path = pathlib.Path(root) / file
                new_path = output_folder / f'{name}-{str(image_index)}.png'

                # Copy the image to the output folder with a new name
                shutil.copy(image_path, new_path)

                # Store the new path in the list
                new_paths.append(new_path)

    finally:
        # Cleanup: Remove the temporary directory
        shutil.rmtree(temp_dir)

    # Return the list of paths to the extracted images
    return new_paths

def split_and_extract_images_from_specific_sheet(excel_file_path, sheet_name):
    """
    Splits a specific sheet of an Excel file into a separate workbook and extracts images from that workbook.

    Args:
        excel_file_path (str): Excel file path.
        sheet_name (str): Name of the specific sheet to split and extract images from.
    """
    # Split the specific sheet into a separate workbook
    split_specific_sheet(excel_file_path, sheet_name)
    
    # Get the path to the newly created workbook
    split_workbook_path = os.path.join(os.path.dirname(excel_file_path), f"{sheet_name}.xlsx")
    
    # Extract images from the newly created workbook
    extract_images_from_excel(split_workbook_path,sheet_name)

# Example usage
excel_file_path = r"C:\Users\Subhadeep\Downloads\Bartrack Files\mns bartrack.xlsx"
sheet_name = "MNS0001"  # Specify the name of the sheet containing the images
split_and_extract_images_from_specific_sheet(excel_file_path, sheet_name)
