In [2]:
import os
import shutil
import tkinter as tk
from tkinter import filedialog
from tqdm import tqdm
import datetime
import logging

# Set up logging
logging.basicConfig(filename='application.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


def move_remaining_files(directory):
    # Create "Possible Duplicates" directory within the original directory
    possible_duplicates_dir = os.path.join(directory, "Possible Duplicates")
    os.makedirs(possible_duplicates_dir, exist_ok=True)

    # Move remaining files to "Possible Duplicates" directory
    for root, _, files in os.walk(directory):
        for file in files:
            source_path = os.path.join(root, file)
            destination_path = os.path.join(possible_duplicates_dir, file)
            try:
                shutil.move(source_path, destination_path)
            except Exception as e:
                logging.error(f"Error moving file: {e}")
            else:
                logging.info(f"Moved file: {source_path} to {destination_path}")


def remove_empty_directories(directory):
    # Remove empty directories recursively
    for root, dirs, _ in os.walk(directory, topdown=False):
        for dir in dirs:
            path = os.path.join(root, dir)
            if not os.listdir(path):
                try:
                    os.rmdir(path)
                except Exception as e:
                    logging.error(f"Error removing directory: {e}")
                else:
                    logging.info(f"Removed empty directory: {path}")


def get_file_count(directory):
    count = 0

    for root, _, files in os.walk(directory):
        count += len(files)

    return count


def get_all_files(directory):
    all_files = []
    for root, _, files in os.walk(directory):
        all_files.extend([os.path.join(root, file) for file in files])

    return all_files


def transfer_files(directory, destination_directory, transfer_progress_bar):
    # Get a list of all subdirectories
    subdirectories = [name for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))]

    # Iterate over each subdirectory
    for subdir in subdirectories:
        subdirectory_path = os.path.join(directory, subdir)

        # Recursively call the function to transfer files in the subdirectory
        transfer_files(subdirectory_path, destination_directory, transfer_progress_bar)

    # Get a list of all files in the directory
    files = [name for name in os.listdir(directory) if os.path.isfile(os.path.join(directory, name))]

    # Move each file to the destination directory
    for file in files:
        file_path = os.path.join(directory, file)
        destination_path = os.path.join(destination_directory, file)
        if not os.path.exists(destination_path):
            try:
                shutil.move(file_path, destination_path)
            except Exception as e:
                logging.error(f"Error moving file: {e}")
            else:
                logging.info(f"Moved file: {file_path} to {destination_path}")
                transfer_progress_bar.update(1)  # Update the transfer progress bar


def check_duplicates(directory):
    # Get a list of all files in the directory and its subdirectories
    all_files = get_all_files(directory)

    # Dictionary to store duplicate files
    duplicates = {}

    # Iterate over each file
    for i in range(len(all_files)):
        file1 = all_files[i]
        if file1 not in duplicates:
            duplicates[file1] = []

        for j in range(i + 1, len(all_files)):
            file2 = all_files[j]
            if os.path.basename(file1) != os.path.basename(file2):
                continue

            # Compare file sizes and extensions
            if os.path.getsize(file1) == os.path.getsize(file2) and os.path.splitext(file1)[1] == os.path.splitext(file2)[1]:
                duplicates[file1].append(file2)

    # Remove duplicate files
    for file, duplicates_list in duplicates.items():
        for duplicate in duplicates_list:
            try:
                os.remove(duplicate)
            except Exception as e:
                logging.error(f"Error removing duplicate file: {e}")
            else:
                logging.info(f"Removed duplicate file: {duplicate}")

    return duplicates


def verify_files(directory, transferred_files, verification_progress_bar):
    # Get a list of all files in the directory and its subdirectories
    all_files = get_all_files(directory)

    # Exclude "removed_duplicates.txt" file from verification count
    all_files = [file for file in all_files if os.path.basename(file) != "removed_duplicates.txt"]

    # Iterate over each file
    for file in all_files:
        # Perform file verification

        verification_progress_bar.update(1)  # Update the verification progress bar

    verification_progress_bar.close()



def create_gui():
    # Create a Tkinter root window
    root = tk.Tk()
    root.withdraw()

    # Select search directory
    search_directory_path = filedialog.askdirectory(title="Select Directory to Search")

    # Select output directory
    output_directory_path = filedialog.askdirectory(title="Select Output Directory")

    # Check if directories were selected
    if search_directory_path and output_directory_path:
        # Initialize start time
        start_time = datetime.datetime.now()

        # Get total file count
        total_files = get_file_count(search_directory_path)

        # Initialize transfer progress bar
        transfer_progress_bar = tqdm(total=total_files, unit="file", desc="Files to be transferred", ncols=80,
                                     position=0)

        # Transfer the files to the output directory
        transfer_files(search_directory_path, output_directory_path, transfer_progress_bar)

        # Close the transfer progress bar
        transfer_progress_bar.close()
        logging.info("File transfer completed.")

        # Check for and remove duplicate files
        duplicates = check_duplicates(output_directory_path)

        # Save duplicate file information to a text file
        duplicate_file_path = os.path.join(output_directory_path, "removed_duplicates.txt")
        with open(duplicate_file_path, "w") as file:
            # Write the list of duplicate files
            if duplicates:
                file.write("Duplicate files:\n")
                for file1, duplicates_list in duplicates.items():
                    file.write(file1 + ":\n")
                    for duplicate in duplicates_list:
                        file.write("- " + duplicate + "\n")

            # Write other relevant information
            file.write("\nTotal files found: " + str(total_files) + "\n")
            file.write("Files transferred: " + str(total_files - len(duplicates)) + "\n")
            file.write("Duplicate files removed: " + str(
                sum(len(duplicates_list) for duplicates_list in duplicates.values())) + "\n")
            file.write("Run time: " + str(datetime.datetime.now() - start_time) + "\n")

        logging.info(
            "Duplicate removal completed. Removed duplicate file information saved to: " + duplicate_file_path)

        # Verify files in the output directory
        output_directory_files = get_all_files(output_directory_path)
        verification_progress_bar = tqdm(total=total_files, unit="file", desc="Verifying files", ncols=80,
                                         position=0)

        verify_files(output_directory_path, total_files - len(duplicates), verification_progress_bar)

        # Move remaining files to "Possible Duplicates" directory and remove empty directories
        move_remaining_files(search_directory_path)
        remove_empty_directories(search_directory_path)

        # Display the final progress information
        logging.info("Total files found: " + str(total_files))
        logging.info("Files transferred: " + str(total_files - len(duplicates)))
        logging.info(
            "Duplicate files removed: " + str(sum(len(duplicates_list) for duplicates_list in duplicates.values())))
        logging.info("Run time: " + str(datetime.datetime.now() - start_time))
    else:
        logging.info("Directory selection canceled.")

    # Destroy the Tkinter root window
    root.destroy()


def main():
    # Configure the logging module
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

    # Create GUI and execute the application
    create_gui()


if __name__ == "__main__":
    main()


Files to be transferred:  90%|████████▉ | 17725/19771 [03:01<00:20, 97.63file/s]
Verifying files:  90%|████████████▌ | 17725/19771 [00:00<00:00, 301706.24file/s]
