In [None]:
import os
import shutil
from pathlib import Path
import openpyxl
from openpyxl import load_workbook, Workbook
import xlrd
from xlrd import open_workbook
import pandas as pd

# Configuration
SOURCE_FOLDER = r"C:\Users\spt-admin\Desktop\PSWEPS"
DEST_FOLDER = r"C:\Users\spt-admin\Desktop\PSWEPS_NEW"

# Column header mappings (case-insensitive)
EMPLOYEE_NUMBER_HEADERS = ["EMPLOYEE_NUMBER", "EE_NUMBER"]
SSNIT_HEADERS = ["SSNIT_NO_IN_DATABASE", "SSNIT NO.", "SSNIT NO", "SOCIAL_SECURITY_NUMBER", "SSNIT_NO", "SSNIT_NO."]
FULLNAME_HEADERS = ["FULL_NAME"]
SURNAME_HEADER = "SURNAME"
OTHERNAME_HEADERS = ["OTHERNAME", "OTHER NAMES", "OTHERNAMES", "OTHER NAME"]

# Log file
log_file = os.path.join(os.path.dirname(DEST_FOLDER), "processing_log.txt")

def log_message(message):
    """Write message to log file and print to console"""
    print(message)
    with open(log_file, 'a', encoding='utf-8') as f:
        f.write(message + '\n')

def find_header_index(headers, target_headers):
    """Find the index of a header from a list of possible header names (case-insensitive)"""
    headers_upper = [str(h).upper().strip() if h else "" for h in headers]
    for target in target_headers:
        target_upper = target.upper().strip()
        if target_upper in headers_upper:
            return headers_upper.index(target_upper)
    return None

def process_sheet_pandas(file_path, sheet_name):
    """Process a single sheet using pandas and return reorganized DataFrame"""
    try:
        # Read the sheet
        df = pd.read_excel(file_path, sheet_name=sheet_name, header=0)
        
        if df.empty:
            return None, "Empty sheet"
        
        # Get headers
        headers = df.columns.tolist()
        headers_upper = [str(h).upper().strip() for h in headers]
        
        # Find required columns
        emp_num_idx = find_header_index(headers, EMPLOYEE_NUMBER_HEADERS)
        ssnit_idx = find_header_index(headers, SSNIT_HEADERS)
        fullname_idx = find_header_index(headers, FULLNAME_HEADERS)
        surname_idx = None
        othername_idx = None
        
        # Check if we need to combine surname and othername
        if fullname_idx is None:
            if SURNAME_HEADER.upper() in headers_upper:
                surname_idx = headers_upper.index(SURNAME_HEADER.upper())
            othername_idx = find_header_index(headers, OTHERNAME_HEADERS)
        
        # Validate required columns exist
        if emp_num_idx is None:
            return None, f"Missing EMPLOYEE_NUMBER"
        if ssnit_idx is None:
            return None, f"Missing SSNIT column"
        if fullname_idx is None and (surname_idx is None or othername_idx is None):
            return None, f"Missing name columns"
        
        # Create new DataFrame with reorganized columns
        new_df = pd.DataFrame()
        
        # Column A: Employee Number
        new_df['EMPLOYEE_NUMBER'] = df.iloc[:, emp_num_idx]
        
        # Column B: Full Name
        if fullname_idx is not None:
            new_df['FULL_NAME'] = df.iloc[:, fullname_idx]
        else:
            # Combine surname and othername
            surname_col = df.iloc[:, surname_idx].fillna('')
            othername_col = df.iloc[:, othername_idx].fillna('')
            new_df['FULL_NAME'] = surname_col.astype(str) + ' ' + othername_col.astype(str)
            new_df['FULL_NAME'] = new_df['FULL_NAME'].str.strip()
        
        # Column C: SSNIT
        new_df['SSNIT_NUMBER'] = df.iloc[:, ssnit_idx]
        
        # Add all other columns (excluding the ones we've already added)
        used_indices = {emp_num_idx, ssnit_idx}
        if fullname_idx is not None:
            used_indices.add(fullname_idx)
        else:
            used_indices.add(surname_idx)
            used_indices.add(othername_idx)
        
        for idx, col in enumerate(headers):
            if idx not in used_indices:
                new_df[col] = df.iloc[:, idx]
        
        return new_df, "Success"
        
    except Exception as e:
        return None, f"Error: {str(e)}"

def process_workbook(source_path, dest_path):
    """Process an entire workbook"""
    try:
        log_message(f"\n{'='*80}")
        log_message(f"Processing: {source_path}")
        
        # Get file extension
        ext = os.path.splitext(source_path)[1].lower()
        
        # Load workbook to get sheet names
        if ext == '.xls':
            # For old Excel files
            wb_xlrd = open_workbook(source_path)
            sheet_names = wb_xlrd.sheet_names()
        else:
            # For .xlsx and .xlsm
            wb_openpyxl = load_workbook(source_path, read_only=True)
            sheet_names = wb_openpyxl.sheetnames
            wb_openpyxl.close()
        
        # Create new workbook for output
        writer = pd.ExcelWriter(dest_path, engine='openpyxl')
        sheets_processed = 0
        sheets_skipped = 0
        
        for sheet_name in sheet_names:
            log_message(f"  Sheet: '{sheet_name}'")
            
            result_df, status = process_sheet_pandas(source_path, sheet_name)
            
            if result_df is not None:
                result_df.to_excel(writer, sheet_name=sheet_name, index=False)
                sheets_processed += 1
                log_message(f"     Processed successfully")
            else:
                sheets_skipped += 1
                log_message(f"     Skipped: {status}")
        
        # Save the workbook
        writer.close()
        
        log_message(f"Summary: {sheets_processed} sheets processed, {sheets_skipped} sheets skipped")
        
        return sheets_processed > 0
        
    except Exception as e:
        log_message(f"  ERROR processing workbook: {str(e)}")
        return False

def process_folder(source_folder, dest_folder):
    """Recursively process all Excel files in folder structure"""
    
    # Create destination folder if it doesn't exist
    os.makedirs(dest_folder, exist_ok=True)
    
    total_files = 0
    processed_files = 0
    skipped_files = 0
    
    # Walk through all directories
    for root, dirs, files in os.walk(source_folder):
        # Calculate relative path
        rel_path = os.path.relpath(root, source_folder)
        current_dest = os.path.join(dest_folder, rel_path)
        
        # Create corresponding destination folder
        os.makedirs(current_dest, exist_ok=True)
        
        # Process Excel files in current directory
        for file in files:
            ext = os.path.splitext(file)[1].lower()
            
            if ext in ['.xlsx', '.xls', '.xlsm']:
                total_files += 1
                source_path = os.path.join(root, file)
                dest_path = os.path.join(current_dest, file)
                
                # Convert .xls to .xlsx in destination
                if ext == '.xls':
                    dest_path = os.path.splitext(dest_path)[0] + '.xlsx'
                
                success = process_workbook(source_path, dest_path)
                
                if success:
                    processed_files += 1
                else:
                    skipped_files += 1
    
    return total_files, processed_files, skipped_files

def main():
    """Main execution function"""
    
    # Initialize log file
    with open(log_file, 'w', encoding='utf-8') as f:
        f.write("Excel Files Reorganization Log\n")
        f.write("=" * 80 + "\n")
        f.write(f"Source: {SOURCE_FOLDER}\n")
        f.write(f"Destination: {DEST_FOLDER}\n")
        f.write("=" * 80 + "\n\n")
    
    log_message("Starting processing...")
    
    # Check if source folder exists
    if not os.path.exists(SOURCE_FOLDER):
        log_message(f"ERROR: Source folder does not exist: {SOURCE_FOLDER}")
        return
    
    # Process all files
    total, processed, skipped = process_folder(SOURCE_FOLDER, DEST_FOLDER)
    
    # Final summary
    log_message("\n" + "=" * 80)
    log_message("PROCESSING COMPLETE")
    log_message("=" * 80)
    log_message(f"Total Excel files found: {total}")
    log_message(f"Successfully processed: {processed}")
    log_message(f"Skipped/Failed: {skipped}")
    log_message(f"\nOutput location: {DEST_FOLDER}")
    log_message(f"Log file: {log_file}")
    log_message("=" * 80)

if __name__ == "__main__":
    main()