In [4]:
import os
import pandas as pd
import numpy as np
from pathlib import Path
import re
import tkinter as tk
from tkinter import filedialog
from datetime import datetime
import platform
import time

def extract_sample_info(filepath):
    """Extract sample information from the file path"""
    path_parts = Path(filepath).parts
    
    # Initialize variables
    particle_type = None
    cycles = None
    replicate = None
    gelatin_concentration = None
    
    # Extract information from path
    for i, part in enumerate(path_parts):
        # Extract gelatin concentration
        if "% gelatin" in part:
            gelatin_concentration = part
        
        # Extract particle type (e.g., "DBPC HMSN")
        if i > 0 and "% gelatin" in path_parts[i-1]:
            particle_type = part
        
        # Extract cycles
        if "cycles" in part and not "cycles_pore_analysis" in part:
            cycles = part
        
        # Extract replicate number (the folder before the filename)
        if i == len(path_parts) - 2:  # Second to last element
            replicate = part
    
    return {
        'Gelatin_Concentration': gelatin_concentration,
        'Particle_Type': particle_type,
        'Cycles': cycles,
        'Replicate': replicate
    }

def abbreviate_particle_type(particle_type):
    """Create abbreviation for particle type"""
    if not particle_type:
        return "UNK"
    
    # Common abbreviations
    abbreviations = {
        "DBPC HMSN": "D",
        "Control": "C",
        "Blank": "B",
        # Add more abbreviations as needed
    }
    
    if particle_type in abbreviations:
        return abbreviations[particle_type]
    
    # If not in dictionary, use first letter of each word
    words = particle_type.split()
    if len(words) > 1:
        return ''.join(word[0].upper() for word in words)
    else:
        return particle_type[:3].upper()

def create_sheet_name(sample_info, sheet_type):
    """Create abbreviated sheet name within Excel's 31-character limit"""
    # Abbreviate particle type
    particle_abbrev = abbreviate_particle_type(sample_info['Particle_Type'])
    
    # Extract cycle number
    cycles = sample_info['Cycles']
    if cycles:
        cycle_match = re.search(r'(\d+)\s*cycles?', cycles, re.IGNORECASE)
        if cycle_match:
            cycle_num = cycle_match.group(1)
        else:
            cycle_num = "X"
    else:
        cycle_num = "X"
    
    # Get replicate number
    replicate = sample_info['Replicate'] or "X"
    
    # Sheet type abbreviations
    sheet_abbrev = {
        'Summary': 'S',
        'Pore Data': 'P',
        'Distribution Data': 'D',
        'Cumulative Data': 'C'
    }
    
    # Create sheet name
    sheet_name = f"{particle_abbrev}-{cycle_num}C-R{replicate}-{sheet_abbrev.get(sheet_type, 'X')}"
    
    # Ensure it's within 31 characters (should be fine with this format)
    return sheet_name[:31]

def check_excel_structure(excel_file):
    """Check what sheets are available in the Excel file"""
    try:
        xl_file = pd.ExcelFile(excel_file)
        return xl_file.sheet_names
    except Exception as e:
        print(f"  Error reading {os.path.basename(excel_file)}: {str(e)}")
        return []

def find_excel_files(base_dir, pattern="*_pore_analysis_*.xlsx"):
    """Recursively find all Excel files matching the pattern"""
    excel_files = list(Path(base_dir).glob(f"**/{pattern}"))
    
    # If no files found with the default pattern, try a more general pattern
    if not excel_files:
        print(f"No files found with pattern '{pattern}', trying more general pattern...")
        excel_files = list(Path(base_dir).glob("**/*pore*.xlsx"))
    
    return [str(f) for f in excel_files]

def copy_sheets_to_merged_file(excel_files, output_file):
    """Copy sheets from multiple Excel files to a single merged file"""
    
    # Track what sheets we've created and any naming conflicts
    sheet_names_used = {}
    sheets_copied = 0
    
    with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
        for file_idx, excel_file in enumerate(excel_files):
            print(f"\n  Processing file {file_idx + 1}/{len(excel_files)}: {os.path.basename(excel_file)}")
            
            # Extract sample info
            sample_info = extract_sample_info(excel_file)
            
            # Get available sheets
            sheets = check_excel_structure(excel_file)
            if not sheets:
                print(f"    No sheets found, skipping...")
                continue
            
            # Process each sheet type
            for sheet_type in ['Summary', 'Pore Data', 'Distribution Data', 'Cumulative Data']:
                if sheet_type in sheets:
                    try:
                        # Read the sheet
                        df = pd.read_excel(excel_file, sheet_name=sheet_type)
                        
                        # Create abbreviated sheet name
                        sheet_name = create_sheet_name(sample_info, sheet_type)
                        
                        # Handle duplicates by adding a suffix
                        original_sheet_name = sheet_name
                        counter = 1
                        while sheet_name in sheet_names_used:
                            sheet_name = f"{original_sheet_name}_{counter}"
                            counter += 1
                            if len(sheet_name) > 31:  # Excel limit
                                sheet_name = f"{original_sheet_name[:28]}_{counter}"
                        
                        # Write to the merged file
                        df.to_excel(writer, sheet_name=sheet_name, index=False)
                        sheet_names_used[sheet_name] = {
                            'file': excel_file,
                            'original_sheet': sheet_type,
                            'sample_info': sample_info
                        }
                        sheets_copied += 1
                        print(f"    ✓ Copied '{sheet_type}' → '{sheet_name}'")
                        
                    except Exception as e:
                        print(f"    ✗ Error copying '{sheet_type}': {str(e)}")
                else:
                    print(f"    - '{sheet_type}' not found")
        
        # Create a summary sheet with mapping information
        summary_data = []
        for sheet_name, info in sheet_names_used.items():
            summary_data.append({
                'Sheet_Name': sheet_name,
                'Original_Sheet_Type': info['original_sheet'],
                'Particle_Type': info['sample_info']['Particle_Type'],
                'Cycles': info['sample_info']['Cycles'],
                'Replicate': info['sample_info']['Replicate'],
                'Source_File': os.path.basename(info['file'])
            })
        
        if summary_data:
            summary_df = pd.DataFrame(summary_data)
            summary_df = summary_df.sort_values(['Particle_Type', 'Cycles', 'Replicate', 'Original_Sheet_Type'])
            summary_df.to_excel(writer, sheet_name='INDEX', index=False)
            print(f"\n  ✓ Created INDEX sheet with mapping information")
        
        # Auto-adjust column widths for ALL sheets
        print("\n  Adjusting column widths...")
        for sheet_name in writer.sheets:
            worksheet = writer.sheets[sheet_name]
            for column in worksheet.columns:
                max_length = 0
                column_letter = column[0].column_letter
                
                # Check header length
                if column[0].value:
                    max_length = len(str(column[0].value))
                
                # Check all cells in the column
                for cell in column:
                    try:
                        if cell.value:
                            cell_length = len(str(cell.value))
                            if cell_length > max_length:
                                max_length = cell_length
                    except:
                        pass
                
                # Set width with a reasonable min/max
                adjusted_width = max(8, min(max_length + 2, 50))
                worksheet.column_dimensions[column_letter].width = adjusted_width
    
    return sheets_copied, sheet_names_used

def main():
    try:
        # Use tkinter to select folder
        import tkinter as tk
        from tkinter import filedialog
        
        # Create root window and hide it
        root = tk.Tk()
        root.withdraw()
        
        # Make the dialog appear on top
        root.lift()
        root.attributes('-topmost', True)
        
        # Platform-specific methods to ensure window is on top
        if platform.system() == 'Windows':
            root.wm_attributes('-topmost', True)
        elif platform.system() == 'Darwin':  # macOS
            root.call('wm', 'attributes', '.', '-topmost', True)
        
        # Force focus
        root.focus_force()
        
        # Small delay to ensure window comes to front
        root.update()
        time.sleep(0.1)
        
        # Ask user to select the base directory
        print("Please select the folder containing your pore analysis Excel files...")
        print("(The file dialog should appear on top of other windows)")
        
        base_dir = filedialog.askdirectory(
            title="Select folder containing pore analysis Excel files",
            initialdir=os.path.expanduser("~"),
            parent=root
        )
        
        # Destroy the root window after selection
        root.destroy()
        
        if not base_dir:
            print("No folder selected. Exiting...")
            return
    except Exception as e:
        print(f"Error opening folder dialog: {e}")
        print("\nPlease enter the folder path manually:")
        base_dir = input("Folder path: ").strip()
        if not os.path.exists(base_dir):
            print(f"Folder not found: {base_dir}")
            return
    
    print(f"\nSearching for Excel files in: {base_dir}")
    
    # Find all Excel files matching the pattern
    excel_files = find_excel_files(base_dir)
    
    if not excel_files:
        print("No Excel files found matching pattern '*_pore_analysis_*.xlsx'")
        print("Make sure your files follow the naming convention: [name]_pore_analysis_[method].xlsx")
        return
    
    print(f"\nFound {len(excel_files)} Excel files:")
    
    # Show all files with sample info
    for i, file in enumerate(excel_files, 1):
        rel_path = os.path.relpath(file, base_dir)
        sample_info = extract_sample_info(file)
        print(f"{i:3d}. {rel_path}")
        print(f"      → {sample_info['Particle_Type']} | {sample_info['Cycles']} | Replicate {sample_info['Replicate']}")
    
    # Ask user to confirm or filter
    print("\nOptions:")
    print("  1. Merge all files")
    print("  2. Select specific files to merge")
    print("  3. Cancel")
    
    choice = input("\nEnter your choice (1-3): ")
    
    if choice == '3':
        print("Merge cancelled.")
        return
    elif choice == '2':
        print("\nEnter the numbers of files to merge (comma-separated, e.g., 1,3,5-8):")
        selection = input("File numbers: ")
        
        # Parse the selection
        selected_indices = []
        for part in selection.split(','):
            part = part.strip()
            if '-' in part:
                start, end = map(int, part.split('-'))
                selected_indices.extend(range(start-1, end))
            else:
                selected_indices.append(int(part)-1)
        
        # Filter files
        excel_files = [excel_files[i] for i in selected_indices if 0 <= i < len(excel_files)]
        print(f"\nSelected {len(excel_files)} files for merging.")
    
    if not excel_files:
        print("No files selected. Exiting...")
        return
    
    print(f"\nProcessing {len(excel_files)} Excel files...")
    
    # Create output filename in the same directory
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_file = os.path.join(base_dir, f"MERGED_pore_analysis_{timestamp}.xlsx")
    
    print(f"\nMerging sheets to: {output_file}")
    
    # Copy all sheets with abbreviated names
    sheets_copied, sheet_names_used = copy_sheets_to_merged_file(excel_files, output_file)
    
    print("\n" + "="*60)
    print("MERGE COMPLETE!")
    print("="*60)
    print(f"Output file: {os.path.basename(output_file)}")
    print(f"Location: {os.path.dirname(output_file)}")
    print(f"\nTotal sheets copied: {sheets_copied}")
    print("\nSheet naming convention:")
    print("  - Particle Type: First letter(s) (e.g., 'D' for DBPC HMSN)")
    print("  - Cycles: Number + 'C' (e.g., '0C' for 0 cycles)")
    print("  - Replicate: 'R' + number (e.g., 'R1' for replicate 1)")
    print("  - Sheet Type: S=Summary, P=Pore Data, D=Distribution, C=Cumulative")
    print("\nExample: 'D-0C-R1-S' = DBPC HMSN, 0 cycles, Replicate 1, Summary sheet")
    print("\nThe INDEX sheet contains the full mapping of abbreviated names to samples.")
    print("\nDone! You can open the merged Excel file to view all results.")

if __name__ == "__main__":
    main()

Please select the folder containing your pore analysis Excel files...
(The file dialog should appear on top of other windows)

Searching for Excel files in: C:/Users/Talaial Alina/OneDrive - UCB-O365/Courses/Year 1/Fall Semester Aug-Dec 2020/CHEN 5840 - Independent Study/Hydrogels/SEM/04JUL25/4% gelatin/1X PBS

Found 6 Excel files:
  1. 0 cycles\10\0 cycles_pore_analysis_Custom_82.xlsx
      → 1X PBS | 0 cycles | Replicate 10
  2. 0 cycles\11\0 cycles_pore_analysis_Custom_91.xlsx
      → 1X PBS | 0 cycles | Replicate 11
  3. 0 cycles\9\0 cycles_pore_analysis_Custom_72.xlsx
      → 1X PBS | 0 cycles | Replicate 9
  4. 28 cycles\10\28 cycles_pore_analysis_Custom_82.xlsx
      → 1X PBS | 28 cycles | Replicate 10
  5. 28 cycles\11\28 cycles_pore_analysis_Multi-Otsu_lowest.xlsx
      → 1X PBS | 28 cycles | Replicate 11
  6. 28 cycles\9\28 cycles_pore_analysis_Multi-Otsu_lowest.xlsx
      → 1X PBS | 28 cycles | Replicate 9

Options:
  1. Merge all files
  2. Select specific files to merge
  


Enter your choice (1-3):  1



Processing 6 Excel files...

Merging sheets to: C:/Users/Talaial Alina/OneDrive - UCB-O365/Courses/Year 1/Fall Semester Aug-Dec 2020/CHEN 5840 - Independent Study/Hydrogels/SEM/04JUL25/4% gelatin/1X PBS\MERGED_pore_analysis_20250705_234918.xlsx

  Processing file 1/6: 0 cycles_pore_analysis_Custom_82.xlsx
    ✓ Copied 'Summary' → '1P-0C-R10-S'
    ✓ Copied 'Pore Data' → '1P-0C-R10-P'
    ✓ Copied 'Distribution Data' → '1P-0C-R10-D'
    ✓ Copied 'Cumulative Data' → '1P-0C-R10-C'

  Processing file 2/6: 0 cycles_pore_analysis_Custom_91.xlsx
    ✓ Copied 'Summary' → '1P-0C-R11-S'
    ✓ Copied 'Pore Data' → '1P-0C-R11-P'
    ✓ Copied 'Distribution Data' → '1P-0C-R11-D'
    ✓ Copied 'Cumulative Data' → '1P-0C-R11-C'

  Processing file 3/6: 0 cycles_pore_analysis_Custom_72.xlsx
    ✓ Copied 'Summary' → '1P-0C-R9-S'
    ✓ Copied 'Pore Data' → '1P-0C-R9-P'
    ✓ Copied 'Distribution Data' → '1P-0C-R9-D'
    ✓ Copied 'Cumulative Data' → '1P-0C-R9-C'

  Processing file 4/6: 28 cycles_pore_analy