In [1]:
import os
import tempfile
import csv
import win32com.client as win32

def extract_embedded_csv_files(file_path, save_directory):
    """
    Extracts embedded CSV files from an Excel document. It creates a new directory,
    extracts all the contents of the Excel file there, and then saves the embedded
    CSV files to the created directory.

    Parameters:
    ----------
    file_path : str, 
        The path to the Excel file to extract embedded CSV files from.
    
    save_directory : str,
        Directory to save the extracted CSV files.
    """

    # Create a new directory to save the extracted CSV files
    os.makedirs(save_directory, exist_ok=True)

    # Make a temporary directory 
    temp_dir = tempfile.mkdtemp()

    # Initialize Excel
    excel = win32.gencache.EnsureDispatch('Excel.Application')    

    try:
        # Suppress alerts to avoid interference
        excel.DisplayAlerts = False

        # Open the Excel file
        workbook = excel.Workbooks.Open(file_path)
        workbook.SaveAs(temp_dir, FileFormat=51)  # Save as Excel 2007-2019 format (xlsx)
        workbook.Close()

        # Iterate through all sheets and OLE objects to locate embedded CSV files
        for sheet in excel.Sheets:
            for ole_object in sheet.OLEObjects():
                try:
                    if ole_object.progID == 'CSV':
                        # Extract embedded CSV data
                        csv_data = ole_object.Object.Text
                        # Save CSV data to a file
                        csv_file_path = os.path.join(save_directory, f"{ole_object.Name}.csv")
                        with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
                            csv_file.write(csv_data)
                except Exception as e:
                    print(f"Error extracting OLE object: {str(e)}")

    except Exception as ex:
        print(f"Error: {str(ex)}")

    finally:
        # Clean up
        excel.Quit()
        shutil.rmtree(temp_dir)

# Replace 'your_excel_file.xlsx' with the actual path to your Excel workbook
excel_file_path = "C:/Users/Shreshtha/Downloads/Sample MNS Bartrack Report.xlsx"

# Replace 'your_save_directory' with the directory where you want to save the extracted CSV files
save_directory = "C:/Users/Shreshtha/Downloads/oyeee/"

# Extract embedded CSV files to the specified directory
extract_embedded_csv_files(excel_file_path, save_directory)


TypeError: This COM object can not automate the makepy process - please run makepy manually for this object

In [2]:
import tempfile
import os
import shutil
import win32com.client as win32

def extract_embedded_csv_files(file_path, save_directory):
    """
    Extracts embedded CSV files from an Excel document. It creates a new directory,
    extracts all the contents of the Excel file there, and then moves the embedded
    CSV files to the created directory.

    Parameters:
    ----------
    file_path : str, 
        The path to the Excel file to extract embedded CSV files from.
    
    save_directory : str,
        Directory to save the extracted CSV files.
    """

    # Create a new directory to save the extracted CSV files
    os.makedirs(save_directory, exist_ok=True)

    # Make a temporary directory 
    temp_dir = tempfile.mkdtemp()

    # Extract contents of the Excel file to the temporary dir
    excel = win32.gencache.EnsureDispatch('Excel.Application')    
    workbook = excel.Workbooks.Open(file_path)
    workbook.SaveAs(temp_dir, FileFormat=51)  # Save as Excel 2007-2019 format (xlsx)
    workbook.Close()

    # Iterate through all sheets and OLE objects to locate embedded CSV files
    for sheet in excel.Sheets:
        for ole_object in sheet.OLEObjects():
            try:
                if ole_object.progID == 'Excel.Sheet':
                    # Extract linked Excel sheet (not CSV)
                    ole_object.Copy()
                    new_workbook = excel.Workbooks.Add()
                    new_workbook.Paste()
                    new_workbook.SaveAs(os.path.join(save_directory, f"{sheet.Name}.xlsx"), FileFormat=51)
                    new_workbook.Close()
                    excel.CutCopyMode = False
                elif ole_object.progID == 'CSV':
                    # Extract embedded CSV file
                    ole_object.Copy()
                    new_workbook = excel.Workbooks.Add()
                    new_workbook.Paste()
                    new_workbook.SaveAs(os.path.join(save_directory, f"{ole_object.Name}.csv"), FileFormat=6)  # FileFormat 6 represents CSV
                    new_workbook.Close()
                    excel.CutCopyMode = False
            except Exception as e:
                print(f"Error extracting OLE object: {str(e)}")

    # Clean up
    excel.Quit()
    shutil.rmtree(temp_dir)

# Replace 'your_excel_file.xlsx' with the actual path to your Excel workbook
excel_file_path = "C:/Users/Shreshtha/Downloads/Sample MNS Bartrack Report.xlsx"

# Replace 'your_save_directory' with the directory where you want to save the extracted CSV files
save_directory = "C:/Users/Shreshtha/Downloads/oiiiiii/"

# Extract embedded CSV files to the specified directory
extract_embedded_csv_files(excel_file_path, save_directory)


TypeError: This COM object can not automate the makepy process - please run makepy manually for this object

In [3]:
import win32com.client as win32
import os
from tkinter import messagebox

# (0) Setup
dir_path = os.path.dirname(os.path.abspath(__file__))
print(dir_path)

try:
    # (1) Open Excel and access the OLE objects
    excel = win32.gencache.EnsureDispatch('Excel.Application')    
    wb = excel.Workbooks.Open(os.path.join(dir_path, "C:/Users/Shreshtha/Downloads/28D04D20.xlsx"))
    ws = wb.Worksheets(1)
    objs = ws.OLEObjects()

    # (2) Access embedded CSV file
    embedded_csv_name = "test_csv_ole.csv"
    ole_object = objs.Add(ClassType="MSComCtlLib.ListViewCtrl.2")
    ole_object.Name = 'Test'
    ole_object.Object.ListItems.Add(1, 1, embedded_csv_name)

    # (3) Extract content from the embedded CSV
    content = ole_object.Object.ListItems(1).Text

    # (4) Output message with content
    messagebox.showinfo(title=f"Embedded CSV Content", message=content)

except Exception as e:
    print(e)

finally:
    # (5) Clean up and close Excel
    wb.Close(True)
    excel.Quit()

    # (6) Release COM resources
    ws = None
    wb = None
    objs = None
    ole_object = None
    excel = None


NameError: name '__file__' is not defined

In [4]:
import win32com.client as win32
import os
from tkinter import messagebox

# (0) Setup
dir_path = os.path.dirname(os.path.abspath(__file__))
print(dir_path)

try:
    # (1) Open Excel and access the OLE objects
    excel = win32.gencache.EnsureDispatch('Excel.Application')    
    wb = excel.Workbooks.Open(os.path.join(dir_path, "C:/Users/Shreshtha/Downloads/28D04D20.xlsx"))
    ws = wb.Worksheets(1)
    objs = ws.OLEObjects()

    # (2) Loop through all OLE objects
    for ole_object in objs:
        # Check if the OLE object contains CSV data
        if ole_object.progID == 'MSComCtlLib.ListViewCtrl.2':
            # Extract content from the embedded CSV
            content = ole_object.Object.ListItems(1).Text

            # (3) Output message with content
            messagebox.showinfo(title=f"Embedded CSV Content", message=content)

except Exception as e:
    print(e)

finally:
    # (4) Clean up and close Excel
    wb.Close(True)
    excel.Quit()

    # (5) Release COM resources
    ws = None
    wb = None
    objs = None
    ole_object = None
    excel = None


NameError: name '__file__' is not defined

In [5]:
import win32com.client as win32
import os
from tkinter import messagebox

# (0) Setup
dir_path = os.getcwd()
print(dir_path)

try:
    # (1) Open Excel and access the OLE objects
    excel = win32.gencache.EnsureDispatch('Excel.Application')    
    wb = excel.Workbooks.Open(os.path.join(dir_path, "C:/Users/Shreshtha/Downloads/28D04D20.xlsx"))
    ws = wb.Worksheets(1)
    objs = ws.OLEObjects()

    # (2) Loop through all OLE objects
    for ole_object in objs:
        # Check if the OLE object contains CSV data
        if ole_object.progID == 'MSComCtlLib.ListViewCtrl.2':
            # Extract content from the embedded CSV
            content = ole_object.Object.ListItems(1).Text

            # (3) Output message with content
            messagebox.showinfo(title=f"Embedded CSV Content", message=content)

except Exception as e:
    print(e)

finally:
    # (4) Clean up and close Excel
    wb.Close(True)
    excel.Quit()

    # (5) Release COM resources
    ws = None
    wb = None
    objs = None
    ole_object = None
    excel = None


C:\Users\Shreshtha
This COM object can not automate the makepy process - please run makepy manually for this object


NameError: name 'wb' is not defined

In [6]:
import win32com.client as win32
import os
from tkinter import messagebox

# (0) Setup
dir_path = os.getcwd()
print(dir_path)

try:
    # (1) Open Excel and access the OLE objects
    excel = win32.gencache.EnsureDispatch('Excel.Application')    
    wb = excel.Workbooks.Open(os.path.join(dir_path, "C:/Users/Shreshtha/Downloads/28D04D20.xlsx"))
    ws = wb.Worksheets(1)
    objs = ws.OLEObjects()

    # (2) Loop through all OLE objects
    for ole_object in objs:
        try:
            # Attempt to extract content from the OLE object
            content = ole_object.Object.Text

            # (3) Output message with content
            messagebox.showinfo(title=f"Embedded Content", message=content)
        except Exception as e:
            print(f"Error extracting content: {e}")

except Exception as e:
    print(e)

finally:
    # (4) Clean up and close Excel
    try:
        wb.Close(True)
        excel.Quit()
    except:
        pass

    # (5) Release COM resources
    ws = None
    wb = None
    objs = None
    ole_object = None
    excel = None


C:\Users\Shreshtha
This COM object can not automate the makepy process - please run makepy manually for this object


In [7]:
import zipfile
import tempfile
import os
import glob
import shutil

def extract_embedded_files(file_path, save_path, sub_dir='xl'):
    """
    Extracts embedded files from Excel documents, including CSV files.
    
    Parameters:
    ----------
    file_path : str
        The path to the Excel file to extract embedded files from.
    
    save_path : str
        Path to save the extracted files to.

    sub_dir : str
        One of 'xl' (for Excel), 'word', or 'ppt'.
    """
    # Make a temporary directory
    temp_dir = tempfile.mkdtemp()

    try:
        # Extract contents of the Excel file to the temporary directory
        with zipfile.ZipFile(file_path, 'r') as zip_file:
            zip_file.extractall(temp_dir)

        # Find all embedded files and copy to the save_path
        embeddings_dir = os.path.join(temp_dir, sub_dir, 'embeddings')
        embedded_files = glob.glob(os.path.join(embeddings_dir, '*'))

        for file in embedded_files:
            # Check if the file is a CSV (you can modify this check based on your requirements)
            if file.lower().endswith('.csv'):
                shutil.copy(file, save_path)

    except Exception as e:
        print(f"Error extracting embedded files: {e}")

    finally:
        # Cleanup: Remove the temporary directory
        shutil.rmtree(temp_dir)

# Example usage:
excel_file_path = "C:/Users/Shreshtha/Downloads/Sample MNS Bartrack Report.xlsx"
save_directory = "C:/Users/Shreshtha/Downloads/extracted_files/"

extract_embedded_files(excel_file_path, save_directory)


In [8]:
import zipfile
import tempfile
import os
import glob
import shutil

def extract_embedded_files(file_path, save_path, sub_dir='xl'):
    """
    Extracts embedded files from Excel documents, including CSV files.
    
    Parameters:
    ----------
    file_path : str
        The path to the Excel file to extract embedded files from.
    
    save_path : str
        Path to save the extracted files to.

    sub_dir : str
        One of 'xl' (for Excel), 'word', or 'ppt'.
    """
    # Make sure the save_path directory exists
    os.makedirs(save_path, exist_ok=True)

    # Make a temporary directory
    temp_dir = tempfile.mkdtemp()

    try:
        # Extract contents of the Excel file to the temporary directory
        with zipfile.ZipFile(file_path, 'r') as zip_file:
            zip_file.extractall(temp_dir)

        # Find all embedded files and copy to the save_path
        embeddings_dir = os.path.join(temp_dir, sub_dir, 'embeddings')
        embedded_files = glob.glob(os.path.join(embeddings_dir, '*'))

        for file in embedded_files:
            # Check if the file is a CSV (you can modify this check based on your requirements)
            if file.lower().endswith('.csv'):
                shutil.copy(file, save_path)

    except Exception as e:
        print(f"Error extracting embedded files: {e}")

    finally:
        # Cleanup: Remove the temporary directory
        shutil.rmtree(temp_dir)

# Example usage:
excel_file_path = "C:/Users/Shreshtha/Downloads/Sample MNS Bartrack Report.xlsx"
save_directory = "C:/Users/Shreshtha/Downloads/extracted_files/"

extract_embedded_files(excel_file_path, save_directory)


In [9]:
import zipfile
import tempfile
import os
import glob
import shutil

def extract_embedded_files(file_path, save_path, sub_dir='xl'):
    """
    Extracts embedded files from Excel documents, including CSV files.
    
    Parameters:
    ----------
    file_path : str
        The path to the Excel file to extract embedded files from.
    
    save_path : str
        Path to save the extracted files to.

    sub_dir : str
        One of 'xl' (for Excel), 'word', or 'ppt'.
    """
    # Make sure the save_path directory exists
    os.makedirs(save_path, exist_ok=True)

    # Make a temporary directory
    temp_dir = tempfile.mkdtemp()

    try:
        # Extract contents of the Excel file to the temporary directory
        with zipfile.ZipFile(file_path, 'r') as zip_file:
            zip_file.extractall(temp_dir)

        # Print contents of the temporary directory for debugging
        print(f"Contents of {temp_dir}:")
        for root, dirs, files in os.walk(temp_dir):
            for file in files:
                print(os.path.relpath(os.path.join(root, file), temp_dir))

        # Find all embedded files and copy to the save_path
        embeddings_dir = os.path.join(temp_dir, sub_dir, 'embeddings')
        embedded_files = glob.glob(os.path.join(embeddings_dir, '*'))

        for file in embedded_files:
            # Check if the file is a CSV (you can modify this check based on your requirements)
            if file.lower().endswith('.csv'):
                shutil.copy(file, save_path)

    except Exception as e:
        print(f"Error extracting embedded files: {e}")

    finally:
        # Cleanup: Remove the temporary directory
        shutil.rmtree(temp_dir)

# Example usage:
excel_file_path = "C:/Users/Shreshtha/Downloads/Sample MNS Bartrack Report.xlsx"
save_directory = "C:/Users/Shreshtha/Downloads/extracted_files/"

extract_embedded_files(excel_file_path, save_directory)


Contents of C:\Users\SHRESH~1\AppData\Local\Temp\tmpv9nrixlt:
[Content_Types].xml
docProps\app.xml
docProps\core.xml
xl\sharedStrings.xml
xl\styles.xml
xl\workbook.xml
xl\drawings\drawing1.xml
xl\drawings\drawing2.xml
xl\drawings\drawing3.xml
xl\drawings\drawing4.xml
xl\drawings\drawing5.xml
xl\drawings\drawing6.xml
xl\drawings\vmlDrawing1.vml
xl\drawings\vmlDrawing2.vml
xl\drawings\vmlDrawing3.vml
xl\drawings\_rels\drawing1.xml.rels
xl\drawings\_rels\drawing5.xml.rels
xl\drawings\_rels\drawing6.xml.rels
xl\drawings\_rels\vmlDrawing1.vml.rels
xl\drawings\_rels\vmlDrawing2.vml.rels
xl\drawings\_rels\vmlDrawing3.vml.rels
xl\embeddings\oleObject1.bin
xl\externalLinks\externalLink1.xml
xl\externalLinks\externalLink2.xml
xl\externalLinks\externalLink3.xml
xl\externalLinks\externalLink4.xml
xl\externalLinks\_rels\externalLink1.xml.rels
xl\externalLinks\_rels\externalLink2.xml.rels
xl\externalLinks\_rels\externalLink3.xml.rels
xl\externalLinks\_rels\externalLink4.xml.rels
xl\media\image1.jpeg

In [11]:
import zipfile
import tempfile
import os
import shutil

def extract_embedded_files(file_path, save_path, sub_dir='xl'):
    """
    Extracts embedded files from Excel documents, including CSV files.
    
    Parameters:
    ----------
    file_path : str
        The path to the Excel file to extract embedded files from.
    
    save_path : str
        Path to save the extracted files to.

    sub_dir : str
        One of 'xl' (for Excel), 'word', or 'ppt'.
    """
    # Make sure the save_path directory exists
    os.makedirs(save_path, exist_ok=True)

    # Make a temporary directory
    temp_dir = tempfile.mkdtemp()

    try:
        # Extract contents of the Excel file to the temporary directory
        with zipfile.ZipFile(file_path, 'r') as zip_file:
            zip_file.extractall(temp_dir)

        # Find all CSV files and copy to the save_path
        for root, dirs, files in os.walk(temp_dir):
            for file in files:
                # Check if the file is a CSV (you can modify this check based on your requirements)
                if file.lower().endswith('.csv'):
                    source_file_path = os.path.join(root, file)
                    destination_file_path = os.path.join(save_path, file)
                    shutil.copy(source_file_path, destination_file_path)

    except Exception as e:
        print(f"Error extracting embedded files: {e}")

    finally:
        # Cleanup: Remove the temporary directory
        shutil.rmtree(temp_dir)

# Example usage:
excel_file_path = "C:/Users/Shreshtha/Downloads/Sample MNS Bartrack Report.xlsx"
save_directory = "C:/Users/Shreshtha/Downloads/extracted_files1/"

extract_embedded_files(excel_file_path, save_directory)


In [12]:
import zipfile
import tempfile
import os
import shutil

def extract_embedded_files(file_path, save_path, sub_dir=''):
    """
    Extracts embedded files from Excel documents, including CSV files.
    
    Parameters:
    ----------
    file_path : str
        The path to the Excel file to extract embedded files from.
    
    save_path : str
        Path to save the extracted files to.

    sub_dir : str
        Subdirectory where embedded files are expected to be found.
    """
    # Make sure the save_path directory exists
    os.makedirs(save_path, exist_ok=True)

    # Make a temporary directory
    temp_dir = tempfile.mkdtemp()

    try:
        # Extract contents of the Excel file to the temporary directory
        with zipfile.ZipFile(file_path, 'r') as zip_file:
            zip_file.extractall(temp_dir)

        # Find all CSV files and copy to the save_path
        for root, dirs, files in os.walk(temp_dir):
            for file in files:
                # Check if the file is a CSV (you can modify this check based on your requirements)
                if file.lower().endswith('.csv'):
                    source_file_path = os.path.join(root, file)
                    destination_file_path = os.path.join(save_path, file)
                    shutil.copy(source_file_path, destination_file_path)

    except Exception as e:
        print(f"Error extracting embedded files: {e}")

    finally:
        # Cleanup: Remove the temporary directory
        shutil.rmtree(temp_dir)

# Example usage:
excel_file_path = "C:/Users/Shreshtha/Downloads/Sample MNS Bartrack Report.xlsx"
save_directory = "C:/Users/Shreshtha/Downloads/extracted_files2/"

extract_embedded_files(excel_file_path, save_directory)


In [13]:
import zipfile

def find_embedded_csv_files(excel_file_path):
    with zipfile.ZipFile(excel_file_path, 'r') as zip_file:
        embedded_files = [info.filename for info in zip_file.infolist() if info.filename.startswith('xl/embeddings/') and info.filename.lower().endswith('.csv')]
    return embedded_files

# Replace 'your_excel_file.xlsx' with the actual path to your Excel file
excel_file_path = "C:/Users/Shreshtha/Downloads/28D04D20.xlsx"

# Check for embedded CSV files
embedded_csv_files = find_embedded_csv_files(excel_file_path)

if embedded_csv_files:
    print("Embedded CSV files found:")
    for file_path in embedded_csv_files:
        print(file_path)
else:
    print("No embedded CSV files found.")


No embedded CSV files found.


In [None]:
import win32com.client as win32
from tkinter import messagebox

def check_embedded_ole_objects(excel_file_path):
    excel = win32.gencache.EnsureDispatch('Excel.Application')
    wb = excel.Workbooks.Open(excel_file_path)
    ws = wb.Worksheets(1)
    objs = ws.OLEObjects()

    if objs.Count > 0:
        messagebox.showinfo(title="OLE Objects Found", message=f"{objs.Count} OLE objects found in the Excel file.")
    else:
        messagebox.showinfo(title="No OLE Objects", message="No OLE objects found in the Excel file.")

    wb.Close(True)
    excel.Quit()

# Example usage:
excel_file_path = "C:/Users/Shreshtha/Downloads/28D04D20.xlsx"

check_embedded_ole_objects(excel_file_path)


In [17]:
import win32com.client as win32

def get_excel_instance():
    try:
        # Try to get the Excel instance without generating makepy files
        excel = win32.gencache.EnsureDispatch('Excel.Application')
    except Exception as e:
        print(f"Error: {e}")
        print("Manually run 'makepy.py -i Excel.Application' to generate makepy files.")
        excel = None

    return excel

# Example usage:
excel_instance = get_excel_instance()

if excel_instance:
    # Your code using excel_instance
    print("Excel instance obtained successfully.")
else:
    print("Unable to obtain Excel instance.")


Excel instance obtained successfully.


In [16]:
import win32com.client
win32com.client.gencache.EnsureModule('{00020813-0000-0000-C000-000000000046}', 0, 1, 9)


<module 'win32com.gen_py.00020813-0000-0000-C000-000000000046x0x1x9' from 'C:\\Users\\SHRESH~1\\AppData\\Local\\Temp\\gen_py\\3.11\\00020813-0000-0000-C000-000000000046x0x1x9\\__init__.py'>