In [1]:
import hashlib
import os
from shutil import move
from datetime import datetime

In [13]:
def calculate_md5(filepath):
    hash_md5 = hashlib.md5()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()

def get_file_size(filepath):
    return os.path.getsize(filepath)

def find_files(directory):
    # 检查目录中的所有文件，不限制文件类型
    for file in os.listdir(directory):
        full_path = os.path.join(directory, file)
        if os.path.isfile(full_path):
            yield full_path

def main(directory, use_md5=True):
    files_map = {}
    for file_path in find_files(directory):
        if use_md5:
            file_key = calculate_md5(file_path)
        else:
            file_key = get_file_size(file_path)

        if file_key not in files_map:
            files_map[file_key] = []
        files_map[file_key].append(file_path)

    for key, files in files_map.items():
        if len(files) > 1:
            print(f"Duplicate files with key ({'MD5' if use_md5 else 'Size'}: {key}):")
            for f in files:
                print(f"  - {f} (Modified: {os.path.getmtime(f)}, Size: {os.path.getsize(f)} bytes)")
            files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
            latest_file = files[0]
            for file in files[1:]:
                pack_dir = os.path.join(directory, 'pack')
                os.makedirs(pack_dir, exist_ok=True)
                move(file, os.path.join(pack_dir, os.path.basename(file)))
                print(f'Moved {file} to {pack_dir}')

if __name__ == "__main__":
    directory = "d:\\document\\WeChat Files\\bachopin\\FileStorage\\File\\2024-04"
    use_md5 = 'yes'
    main(directory, use_md5)


Duplicate files with key (MD5: e81775030f82b5ba686f56d472ed9a2c):
  - d:\document\WeChat Files\bachopin\FileStorage\File\2024-04\(2份)2024学硕复试秘书安排(2).xlsx (Modified: 1712642602.0, Size: 20672 bytes)
  - d:\document\WeChat Files\bachopin\FileStorage\File\2024-04\(2份)2024学硕复试秘书安排.xlsx (Modified: 1712642602.0, Size: 20672 bytes)
Moved d:\document\WeChat Files\bachopin\FileStorage\File\2024-04\(2份)2024学硕复试秘书安排.xlsx to d:\document\WeChat Files\bachopin\FileStorage\File\2024-04\pack
Duplicate files with key (MD5: eb3b4068fdbccc74c1c53ec823fa2948):
  - d:\document\WeChat Files\bachopin\FileStorage\File\2024-04\00.bert_tool(1).ipynb (Modified: 1711091056.0, Size: 9618 bytes)
  - d:\document\WeChat Files\bachopin\FileStorage\File\2024-04\00.bert_tool.ipynb (Modified: 1711091056.0, Size: 9618 bytes)
Moved d:\document\WeChat Files\bachopin\FileStorage\File\2024-04\00.bert_tool.ipynb to d:\document\WeChat Files\bachopin\FileStorage\File\2024-04\pack
Duplicate files with key (MD5: 4ea5b3b7c072ae48a4