In [1]:
!pip install pandas XlsxWriter



In [2]:
import os
import shutil
from collections import defaultdict
import pandas as pd

def copy_and_rename_files(source_dir, target_dir):
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    
    file_counter = defaultdict(int)
    file_info = []

    for root, _, files in os.walk(source_dir):
        for file in files:
            # Get file extension and name without extension
            file_ext = os.path.splitext(file)[1]
            file_name = os.path.splitext(file)[0]
            
            # Increment file counter to avoid overwriting
            file_counter[file_name] += 1
            new_file_name = f"{file_name}_{file_counter[file_name]}{file_ext}"
            
            source_file_path = os.path.join(root, file)
            target_file_path = os.path.join(target_dir, new_file_name)
            
            shutil.copy2(source_file_path, target_file_path)
            file_info.append((source_file_path, file, target_file_path, file_ext))
    
    return file_info

def organize_files_by_extension(target_dir):
    file_locations = []

    for root, _, files in os.walk(target_dir):
        for file in files:
            file_ext = os.path.splitext(file)[1][1:]  # Get extension without dot
            if file_ext:  # Ensure the file has an extension
                ext_folder_path = os.path.join(target_dir, file_ext)
                if not os.path.exists(ext_folder_path):
                    os.makedirs(ext_folder_path)
                
                source_file_path = os.path.join(root, file)
                target_file_path = os.path.join(ext_folder_path, file)
                
                shutil.move(source_file_path, target_file_path)
                file_locations.append((source_file_path, target_file_path, file_ext))
    
    return file_locations

def create_excel_report(file_info, file_locations, excel_path):
    data = []
    file_location_dict = {new_path: (source_path, old_name, ext) for source_path, old_name, new_path, ext in file_info}
    
    for new_path, final_path, ext in file_locations:
        source_path, old_name, original_ext = file_location_dict.get(new_path, (None, None, None))
        if source_path is not None:
            folder_name = os.path.basename(os.path.dirname(final_path))
            data.append([source_path, old_name, os.path.basename(new_path), folder_name])
    
    df = pd.DataFrame(data, columns=["Original Path", "Old Name", "New Name", "Folder"])
    df.to_excel(excel_path, index=False)

if __name__ == "__main__":
    source_dir = "TestDirectory"  # Specify your source directory here
    target_dir = "cleanedDir"  # Specify your target directory here
    excel_path = "newFileTracker.xlsx"  # Specify your Excel report path here
    
    file_info = copy_and_rename_files(source_dir, target_dir)
    file_locations = organize_files_by_extension(target_dir)
    create_excel_report(file_info, file_locations, excel_path)
    
    print(f"Files have been copied, renamed, organized, and the report has been created at {excel_path}")


Files have been copied, renamed, organized, and the report has been created at newFileTracker.xlsx
