In [3]:
pip install pandas openpyxl xlrd

Collecting xlrd
  Downloading xlrd-2.0.1-py2.py3-none-any.whl.metadata (3.4 kB)
Downloading xlrd-2.0.1-py2.py3-none-any.whl (96 kB)
Installing collected packages: xlrd
Successfully installed xlrd-2.0.1
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import os
import time
import logging
import warnings
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
import openpyxl

# Suppress style warnings
warnings.filterwarnings('ignore', category=UserWarning, module='openpyxl.styles.stylesheet')

# Configure logging
logging.basicConfig(
    filename='mass_excel_rename.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

# Hardcoded path - MODIFY WITH YOUR SPECIFIC PATH
ROOT_DIR = r"F:\Flipcarbon\Ajax\Maker"

# Debug mode - set to True for verbose output
DEBUG_MODE = True

def fast_read_title(file_path):
    """Quickly read first cell using openpyxl without loading full file"""
    try:
        wb = openpyxl.load_workbook(file_path, read_only=True, data_only=True)
        sheet = wb.active
        title_value = sheet.cell(row=1, column=1).value
        
        # Handle None values
        if title_value is None:
            return ""
        return str(title_value)
    except Exception as e:
        logging.error(f"Excel read failed: {file_path}: {str(e)}")
        raise RuntimeError(f"Excel read failed: {str(e)}") from e
    finally:
        if 'wb' in locals():
            try:
                wb.close()
            except:
                pass  # Ignore errors on close

def extract_name_from_title(title, filename):
    """Extract just the location code from the title"""
    logging.info(f"Processing title: '{title}'")
    
    # Specific pattern extraction: Get just "Baratang - AN201" from 
    # "Maker Month Wise Data of Baratang - AN201 , Andaman & Nicobar Island (2024)"
    prefix = "Maker Month Wise Data of "
    if "Maker Month Wise Data" in title:
        # Find the start position after "Maker Month Wise Data of "
        # Handle both "Maker Month Wise Data of" and "Maker Month Wise Data  of" (extra space)
        if "Maker Month Wise Data of " in title:
            start = title.find("Maker Month Wise Data of ") + len("Maker Month Wise Data of ")
        else:
            start = title.find("Maker Month Wise Data  of ") + len("Maker Month Wise Data  of ")
        
        # Find the comma after the location code
        end = title.find(",", start)
        if end != -1:
            return title[start:end].strip()
        else:
            # If no comma, try finding the state name in parentheses
            end = title.find(" (", start)
            if end != -1:
                return title[start:end].strip()
            else:
                # If no obvious delimiter, use the rest of the string
                return title[start:].strip()
    
    # Fallback - use original filename
    logging.warning(f"Could not extract name from title: '{title}', using original filename")
    return os.path.splitext(filename)[0]

def process_file(args):
    """Process a single file with enhanced error handling and debugging"""
    folder, filename = args
    file_path = os.path.join(folder, filename)
    original_name, ext = os.path.splitext(filename)
    
    try:
        # Read title from Excel file
        title = fast_read_title(file_path)
        
        if DEBUG_MODE:
            logging.info(f"File: {filename}, Title: '{title}'")
            if not title:
                logging.warning(f"Empty title in file: {filename}")
        
        # Extract just the location code
        new_name = extract_name_from_title(title, filename)
        
        # Clean the new name
        new_name = new_name.replace('/', '-').replace('\\', '-').strip()
        if not new_name:
            raise ValueError("Failed to extract a valid name")
            
        new_filename = f"{new_name}{ext}"
        new_path = os.path.join(folder, new_filename)
        
        if DEBUG_MODE:
            logging.info(f"Extracted name: '{new_name}' for file: {filename}")
        
        # Skip if no change needed
        if new_filename == filename:
            return (folder, "skipped (same name)")
            
        if os.path.exists(new_path):
            # Add timestamp to make unique
            timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
            new_filename = f"{new_name}_{timestamp}{ext}"
            new_path = os.path.join(folder, new_filename)
            logging.warning(f"Duplicate name, adding timestamp: {new_filename}")
            
        # Perform the rename
        os.rename(file_path, new_path)
        return (folder, "success")

    except Exception as e:
        error_msg = f"{file_path} - {str(e)}"
        logging.error(error_msg)
        print(f"Error: {error_msg}") if DEBUG_MODE else None
        return (folder, "error")

def process_folder(max_workers=4):
    """Process files in parallel with thread pool"""
    folder_stats = {}
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Collect all files first for accurate progress
        file_count = 0
        all_files = []
        for folder, _, files in os.walk(ROOT_DIR):
            excel_files = [f for f in files if f.lower().endswith(('.xlsx', '.xls'))]
            all_files.extend([(folder, f) for f in excel_files])
            folder_stats[folder] = {
                'total': len(excel_files),
                'success': 0,
                'error': 0,
                'skipped': 0
            }
        file_count = len(all_files)
        
        # Process files with progress updates
        start_time = time.time()
        processed = 0
        print(f"\nProcessing {file_count} files across {len(folder_stats)} folders...")
        
        # For debugging, process just a few files first
        if DEBUG_MODE:
            test_files = all_files[:5] if len(all_files) > 5 else all_files
            print(f"DEBUG MODE: Processing {len(test_files)} sample files first...")
            
            for file_args in test_files:
                folder, filename = file_args
                print(f"Processing: {os.path.join(folder, filename)}")
                result = process_file(file_args)
                print(f"Result: {result[1]}")
            
            proceed = input("Continue with all files? (y/n): ").lower() == 'y'
            if not proceed:
                print("Operation cancelled by user")
                return folder_stats
        
        # Process all files
        for result in executor.map(process_file, all_files):
            folder, status = result
            status_key = status.split()[0]  # Get first word (success, error, skipped)
            folder_stats[folder][status_key] += 1
            processed += 1
            
            # Progress update every 50 files or 5 seconds
            if processed % 50 == 0 or (time.time() - start_time) > 5:
                elapsed = time.time() - start_time
                print(f"Processed {processed}/{file_count} files ({elapsed:.1f}s)")
                start_time = time.time()  # Reset timer

    return folder_stats

def print_stats(folder_stats):
    """Print detailed statistics table"""
    print("\n\nProcessing Summary:")
    print("-" * 85)
    print(f"{'Folder Path':<50} | {'Total':>6} | {'Success':>7} | {'Errors':>6} | {'Skipped':>7}")
    print("-" * 85)
    
    for folder, stats in sorted(folder_stats.items()):
        print(f"{folder:<50} | {stats['total']:>6} | {stats['success']:>7} | "
              f"{stats['error']:>6} | {stats['skipped']:>7}")
    
    totals = {
        'total': sum(s['total'] for s in folder_stats.values()),
        'success': sum(s['success'] for s in folder_stats.values()),
        'error': sum(s['error'] for s in folder_stats.values()),
        'skipped': sum(s['skipped'] for s in folder_stats.values())
    }
    
    print("-" * 85)
    print(f"{'TOTALS':<50} | {totals['total']:>6} | {totals['success']:>7} | "
          f"{totals['error']:>6} | {totals['skipped']:>7}")
    print("-" * 85)

def analyze_sample_files():
    """Analyze a few sample files to debug title extraction issues"""
    print("\nSample File Analysis:")
    print("-" * 100)
    
    for folder, _, files in os.walk(ROOT_DIR):
        excel_files = [f for f in files if f.lower().endswith(('.xlsx', '.xls'))]
        if not excel_files:
            continue
            
        # Take up to 2 files from each folder for analysis
        sample_files = excel_files[:2]
        for filename in sample_files:
            file_path = os.path.join(folder, filename)
            try:
                title = fast_read_title(file_path)
                new_name = extract_name_from_title(title, filename)
                
                print(f"File: {filename}")
                print(f"  Path: {folder}")
                print(f"  Title in A1: '{title}'")
                print(f"  Extracted Name: '{new_name}'")
                print("-" * 100)
                
            except Exception as e:
                print(f"File: {filename}")
                print(f"  Path: {folder}")
                print(f"  ERROR: {str(e)}")
                print("-" * 100)

if __name__ == "__main__":
    # Validate path before processing
    if not os.path.isdir(ROOT_DIR):
        print(f"Error: Path does not exist or is not a directory\n{ROOT_DIR}")
        exit(1)
    
    print(f"Starting processing for:\n{ROOT_DIR}")
    
    # First analyze sample files if in debug mode
    if DEBUG_MODE:
        choice = input("Run sample analysis first? (y/n): ").lower()
        if choice == 'y':
            analyze_sample_files()
    
    # Start the main processing
    start = time.time()
    stats = process_folder()
    duration = time.time() - start
    
    print_stats(stats)
    print(f"\nTotal processing time: {duration:.2f} seconds")
    print(f"Log file: mass_excel_rename.log")

Starting processing for:
F:\Flipcarbon\Ajax\Maker

Sample File Analysis:
----------------------------------------------------------------------------------------------------
File: Maker Month Wise Data  of Baratang - AN201.xlsx
  Path: F:\Flipcarbon\Ajax\Maker\Andaman & Nicobar Island
  Title in A1: 'Maker Month Wise Data  of Baratang - AN201 , Andaman & Nicobar Island (2024)'
  Extracted Name: 'Baratang - AN201'
----------------------------------------------------------------------------------------------------
File: Maker Month Wise Data  of Car Nicobar - AN211.xlsx
  Path: F:\Flipcarbon\Ajax\Maker\Andaman & Nicobar Island
  Title in A1: 'Maker Month Wise Data  of Car Nicobar - AN211 , Andaman & Nicobar Island (2024)'
  Extracted Name: 'Car Nicobar - AN211'
----------------------------------------------------------------------------------------------------
File: Maker Month Wise Data  of Adoni RTO - AP221.xlsx
  Path: F:\Flipcarbon\Ajax\Maker\Andhra Pradesh
  Title in A1: 'Maker Mont

In [5]:
import os
import time
import logging
import warnings
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
import openpyxl

# Suppress style warnings
warnings.filterwarnings('ignore', category=UserWarning, module='openpyxl.styles.stylesheet')

# Configure logging
logging.basicConfig(
    filename='mass_excel_rename.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

# Hardcoded path - MODIFY WITH YOUR SPECIFIC PATH
ROOT_DIR = r"F:\Flipcarbon\Ajax\Maker"

# Debug mode - set to True for verbose output
DEBUG_MODE = True

# Global collection for tracking errors
error_files = []

def fast_read_title(file_path):
    """Quickly read first cell using openpyxl without loading full file"""
    try:
        wb = openpyxl.load_workbook(file_path, read_only=True, data_only=True)
        sheet = wb.active
        title_value = sheet.cell(row=1, column=1).value
        
        # Handle None values
        if title_value is None:
            return ""
        return str(title_value)
    except Exception as e:
        logging.error(f"Excel read failed: {file_path}: {str(e)}")
        raise RuntimeError(f"Excel read failed: {str(e)}") from e
    finally:
        if 'wb' in locals():
            try:
                wb.close()
            except:
                pass  # Ignore errors on close

def extract_name_from_title(title, filename):
    """Extract just the location code from the title"""
    logging.info(f"Processing title: '{title}'")
    
    # Specific pattern extraction: Get just "Baratang - AN201" from 
    # "Maker Month Wise Data of Baratang - AN201 , Andaman & Nicobar Island (2024)"
    if "Maker Month Wise Data" in title:
        # Find the start position after "Maker Month Wise Data of "
        # Handle both "Maker Month Wise Data of" and "Maker Month Wise Data  of" (extra space)
        if "Maker Month Wise Data of " in title:
            start = title.find("Maker Month Wise Data of ") + len("Maker Month Wise Data of ")
        else:
            start = title.find("Maker Month Wise Data  of ") + len("Maker Month Wise Data  of ")
        
        # Find the comma after the location code
        end = title.find(",", start)
        if end != -1:
            return title[start:end].strip()
        else:
            # If no comma, try finding the state name in parentheses
            end = title.find(" (", start)
            if end != -1:
                return title[start:end].strip()
            else:
                # If no obvious delimiter, use the rest of the string
                return title[start:].strip()
    
    # Fallback - use original filename
    logging.warning(f"Could not extract name from title: '{title}', using original filename")
    return os.path.splitext(filename)[0]

def process_file(args):
    """Process a single file with enhanced error handling and debugging"""
    folder, filename = args
    file_path = os.path.join(folder, filename)
    original_name, ext = os.path.splitext(filename)
    
    try:
        # Read title from Excel file
        title = fast_read_title(file_path)
        
        if DEBUG_MODE:
            logging.info(f"File: {filename}, Title: '{title}'")
            if not title:
                logging.warning(f"Empty title in file: {filename}")
        
        # Extract just the location code
        new_name = extract_name_from_title(title, filename)
        
        # Clean the new name
        new_name = new_name.replace('/', '-').replace('\\', '-').strip()
        if not new_name:
            raise ValueError("Failed to extract a valid name")
            
        new_filename = f"{new_name}{ext}"
        new_path = os.path.join(folder, new_filename)
        
        if DEBUG_MODE:
            logging.info(f"Extracted name: '{new_name}' for file: {filename}")
        
        # Skip if no change needed
        if new_filename == filename:
            return (folder, "skipped (same name)")
            
        if os.path.exists(new_path):
            # Add timestamp to make unique
            timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
            new_filename = f"{new_name}_{timestamp}{ext}"
            new_path = os.path.join(folder, new_filename)
            logging.warning(f"Duplicate name, adding timestamp: {new_filename}")
            
        # Perform the rename
        os.rename(file_path, new_path)
        return (folder, "success")

    except Exception as e:
        error_msg = f"{file_path} - {str(e)}"
        logging.error(error_msg)
        # Add to global error tracking
        error_files.append({
            'folder': folder,
            'filename': filename,
            'error': str(e)
        })
        
        if DEBUG_MODE:
            print(f"Error: {error_msg}")
        return (folder, "error")

def process_folder(max_workers=4):
    """Process files in parallel with thread pool"""
    folder_stats = {}
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Collect all files first for accurate progress
        file_count = 0
        all_files = []
        for folder, _, files in os.walk(ROOT_DIR):
            excel_files = [f for f in files if f.lower().endswith(('.xlsx', '.xls'))]
            all_files.extend([(folder, f) for f in excel_files])
            folder_stats[folder] = {
                'total': len(excel_files),
                'success': 0,
                'error': 0,
                'skipped': 0
            }
        file_count = len(all_files)
        
        # Process files with progress updates
        start_time = time.time()
        processed = 0
        print(f"\nProcessing {file_count} files across {len(folder_stats)} folders...")
        
        # For debugging, process just a few files first
        if DEBUG_MODE:
            test_files = all_files[:5] if len(all_files) > 5 else all_files
            print(f"DEBUG MODE: Processing {len(test_files)} sample files first...")
            
            for file_args in test_files:
                folder, filename = file_args
                print(f"Processing: {os.path.join(folder, filename)}")
                result = process_file(file_args)
                print(f"Result: {result[1]}")
            
            proceed = input("Continue with all files? (y/n): ").lower() == 'y'
            if not proceed:
                print("Operation cancelled by user")
                return folder_stats
        
        # Process all files
        for result in executor.map(process_file, all_files):
            folder, status = result
            status_key = status.split()[0]  # Get first word (success, error, skipped)
            folder_stats[folder][status_key] += 1
            processed += 1
            
            # Progress update every 50 files or 5 seconds
            if processed % 50 == 0 or (time.time() - start_time) > 5:
                elapsed = time.time() - start_time
                print(f"Processed {processed}/{file_count} files ({elapsed:.1f}s)")
                start_time = time.time()  # Reset timer

    return folder_stats

def print_stats(folder_stats):
    """Print detailed statistics table"""
    print("\n\nProcessing Summary:")
    print("-" * 85)
    print(f"{'Folder Path':<50} | {'Total':>6} | {'Success':>7} | {'Errors':>6} | {'Skipped':>7}")
    print("-" * 85)
    
    for folder, stats in sorted(folder_stats.items()):
        print(f"{folder:<50} | {stats['total']:>6} | {stats['success']:>7} | "
              f"{stats['error']:>6} | {stats['skipped']:>7}")
    
    totals = {
        'total': sum(s['total'] for s in folder_stats.values()),
        'success': sum(s['success'] for s in folder_stats.values()),
        'error': sum(s['error'] for s in folder_stats.values()),
        'skipped': sum(s['skipped'] for s in folder_stats.values())
    }
    
    print("-" * 85)
    print(f"{'TOTALS':<50} | {totals['total']:>6} | {totals['success']:>7} | "
          f"{totals['error']:>6} | {totals['skipped']:>7}")
    print("-" * 85)

def export_error_list():
    """Export the list of error files to a CSV file"""
    if not error_files:
        print("No errors to export!")
        return
        
    # Create error report
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    report_file = f"error_files_{timestamp}.csv"
    
    with open(report_file, 'w') as f:
        f.write("Folder,Filename,Error\n")
        for error in error_files:
            # Clean up any commas in error messages to prevent CSV issues
            safe_error = error['error'].replace(',', ' ')
            f.write(f"{error['folder']},{error['filename']},{safe_error}\n")
    
    print(f"\nError file list exported to: {report_file}")
    print(f"Total error files: {len(error_files)}")
    
    # Also display the first 10 errors on screen
    if error_files:
        print("\nSample of Error Files:")
        print("-" * 100)
        for i, error in enumerate(error_files[:10]):
            print(f"{i+1}. {os.path.join(error['folder'], error['filename'])}")
            print(f"   Error: {error['error']}")
        
        if len(error_files) > 10:
            print(f"...and {len(error_files) - 10} more. See {report_file} for complete list.")

def analyze_sample_files():
    """Analyze a few sample files to debug title extraction issues"""
    print("\nSample File Analysis:")
    print("-" * 100)
    
    for folder, _, files in os.walk(ROOT_DIR):
        excel_files = [f for f in files if f.lower().endswith(('.xlsx', '.xls'))]
        if not excel_files:
            continue
            
        # Take up to 2 files from each folder for analysis
        sample_files = excel_files[:2]
        for filename in sample_files:
            file_path = os.path.join(folder, filename)
            try:
                title = fast_read_title(file_path)
                new_name = extract_name_from_title(title, filename)
                
                print(f"File: {filename}")
                print(f"  Path: {folder}")
                print(f"  Title in A1: '{title}'")
                print(f"  Extracted Name: '{new_name}'")
                print("-" * 100)
                
            except Exception as e:
                print(f"File: {filename}")
                print(f"  Path: {folder}")
                print(f"  ERROR: {str(e)}")
                print("-" * 100)

if __name__ == "__main__":
    # Validate path before processing
    if not os.path.isdir(ROOT_DIR):
        print(f"Error: Path does not exist or is not a directory\n{ROOT_DIR}")
        exit(1)
    
    print(f"Starting processing for:\n{ROOT_DIR}")
    
    # First analyze sample files if in debug mode
    if DEBUG_MODE:
        choice = input("Run sample analysis first? (y/n): ").lower()
        if choice == 'y':
            analyze_sample_files()
    
    # Start the main processing
    start = time.time()
    stats = process_folder()
    duration = time.time() - start
    
    print_stats(stats)
    
    # Export error files list
    export_error_list()
    
    print(f"\nTotal processing time: {duration:.2f} seconds")
    print(f"Log file: mass_excel_rename.log")

Starting processing for:
F:\Flipcarbon\Ajax\Maker

Sample File Analysis:
----------------------------------------------------------------------------------------------------
File: Car Nicobar - AN211.xlsx
  Path: F:\Flipcarbon\Ajax\Maker\Andaman & Nicobar Island
  Title in A1: 'Maker Month Wise Data  of Car Nicobar - AN211 , Andaman & Nicobar Island (2024)'
  Extracted Name: 'Car Nicobar - AN211'
----------------------------------------------------------------------------------------------------
File: Diglipur - AN204.xlsx
  Path: F:\Flipcarbon\Ajax\Maker\Andaman & Nicobar Island
  Title in A1: 'Maker Month Wise Data  of Diglipur - AN204 , Andaman & Nicobar Island (2024)'
  Extracted Name: 'Diglipur - AN204'
----------------------------------------------------------------------------------------------------
File: Adoni RTO - AP221.xlsx
  Path: F:\Flipcarbon\Ajax\Maker\Andhra Pradesh
  Title in A1: 'Maker Month Wise Data  of Adoni RTO - AP221 , Andhra Pradesh (2024)'
  Extracted Name: '