In [1]:
import os
import shutil

def retain_specific_files(folder_path, keep_files):
    """
    Retain specified files in a folder and delete the rest.
    
    :param folder_path: Path to the folder containing the files
    :param keep_files: List of filenames to keep
    """
    # Normalize the file paths to use full names
    keep_files_set = set(keep_files)
    
    # Create a backup folder for deleted files (optional, but recommended)
    backup_folder = os.path.join(folder_path, 'deleted_files')
    os.makedirs(backup_folder, exist_ok=True)
    
    # Iterate through all files in the folder
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        
        # Skip if it's a directory
        if os.path.isdir(file_path):
            continue
        
        # Check if the file should be kept
        if filename not in keep_files_set:
            # Move the file to backup folder instead of deleting
            backup_path = os.path.join(backup_folder, filename)
            shutil.move(file_path, backup_path)
            print(f"Moved {filename} to backup folder")
    
    print("File retention process completed.")

# List of files to keep
keep_files = [
    '0_u00003s00001hw00001.svc', '0_u00003s00001hw00003.svc', 
    '0_u00035s00001hw00004.svc', '0_u00040s00001hw00004.svc', 
    '0_u00040s00001hw00006.svc', '0_u00040s00001hw00007.svc', 
    '0_u00044s00001hw00001.svc', '1_u00010s00001hw00001.svc', 
    '1_u00010s00001hw00002.svc', '1_u00010s00001hw00003.svc', 
    '1_u00010s00001hw00004.svc', '1_u00010s00001hw00005.svc', 
    '1_u00010s00001hw00006.svc', '1_u00010s00001hw00007.svc', 
    '1_u00021s00001hw00004.svc', '1_u00021s00001hw00007.svc', 
    '1_u00038s00001hw00002.svc', '2_u00001s00001hw00002.svc', 
    '2_u00001s00001hw00003.svc', '2_u00001s00001hw00005.svc', 
    '2_u00001s00001hw00007.svc', '2_u00002s00001hw00001.svc', 
    '2_u00002s00001hw00002.svc', '2_u00002s00001hw00003.svc', 
    '2_u00002s00001hw00006.svc', '2_u00002s00001hw00007.svc', 
    '2_u00014s00001hw00001.svc', '2_u00014s00001hw00002.svc', 
    '2_u00014s00001hw00002old.svc', '2_u00014s00001hw00003.svc', 
    '2_u00014s00001hw00005.svc', '2_u00025s00001hw00002.svc', 
    '2_u00025s00001hw00003.svc', '2_u00025s00001hw00005.svc', 
    '2_u00025s00001hw00006.svc', '2_u00025s00001hw00007.svc', 
    '2_u00026s00001hw00001.svc', '2_u00026s00001hw00002.svc', 
    '2_u00026s00001hw00003.svc', '2_u00026s00001hw00005.svc', 
    '2_u00026s00001hw00007.svc', '2_u00029s00001hw00002.svc', 
    '2_u00029s00001hw00003.svc', '2_u00029s00001hw00005.svc', 
    '2_u00029s00001hw00006.svc', '2_u00029s00001hw00007.svc', 
    '2_u00031s00001hw00001.svc', '2_u00031s00001hw00002.svc', 
    '2_u00031s00001hw00003.svc', '2_u00031s00001hw00005.svc', 
    '2_u00031s00001hw00007.svc', '2_u00032s00001hw00001.svc', 
    '2_u00032s00001hw00002.svc', '2_u00032s00001hw00006.svc', 
    '2_u00033s00001hw00006.svc', '2_u00036s00001hw00001.svc', 
    '2_u00036s00001hw00002.svc', '2_u00036s00001hw00005.svc', 
    '2_u00036s00001hw00006.svc', '2_u00036s00001hw00007.svc', 
    '2_u00041s00001hw00001.svc', '2_u00041s00001hw00002.svc', 
    '2_u00041s00001hw00003.svc', '2_u00042s00001hw00001.svc', 
    '2_u00042s00001hw00002.svc', '2_u00042s00001hw00003.svc', 
    '2_u00042s00001hw00006.svc', '2_u00042s00001hw00007.svc', 
    '2_u00043s00001hw00001.svc', '2_u00043s00001hw00002.svc', 
    '2_u00043s00001hw00003.svc', '2_u00043s00001hw00007.svc', 
    '3_u00020s00001hw00001.svc', '3_u00020s00001hw00002.svc', 
    '3_u00020s00001hw00003.svc', '3_u00020s00001hw00003old.svc', 
    '3_u00021s00001hw00001.svc', '3_u00021s00001hw00002.svc', 
    '3_u00021s00001hw00003.svc'
]

# Specify the path to your test_data folder
folder_path = 'test/test_data'

# Call the function to retain specific files
retain_specific_files(folder_path, keep_files)

Moved 0_u00003s00001hw00002.svc to backup folder
Moved 0_u00003s00001hw00004.svc to backup folder
Moved 0_u00003s00001hw00005.svc to backup folder
Moved 0_u00003s00001hw00006.svc to backup folder
Moved 0_u00003s00001hw00007.svc to backup folder
Moved 0_u00013s00001hw00001.svc to backup folder
Moved 0_u00013s00001hw00002.svc to backup folder
Moved 0_u00013s00001hw00003.svc to backup folder
Moved 0_u00013s00001hw00004.svc to backup folder
Moved 0_u00013s00001hw00005.svc to backup folder
Moved 0_u00013s00001hw00006.svc to backup folder
Moved 0_u00013s00001hw00007.svc to backup folder
Moved 0_u00015s00001hw00001.svc to backup folder
Moved 0_u00015s00001hw00002.svc to backup folder
Moved 0_u00015s00001hw00003.svc to backup folder
Moved 0_u00015s00001hw00004.svc to backup folder
Moved 0_u00015s00001hw00005.svc to backup folder
Moved 0_u00015s00001hw00006.svc to backup folder
Moved 0_u00015s00001hw00007.svc to backup folder
Moved 0_u00027s00001hw00001.svc to backup folder
Moved 0_u00027s00001

In [2]:
import os

def rename_files_in_directory(directory):
    """
    Rename files in the specified directory by removing the first two characters 
    from their filenames.
    
    Args:
    directory (str): Path to the directory containing files to rename
    """
    # Ensure the directory path exists
    if not os.path.exists(directory):
        print(f"Error: Directory {directory} does not exist.")
        return
    
    # Iterate through all files in the directory
    for filename in os.listdir(directory):
        # Check if the filename starts with a digit and underscore
        if len(filename) > 2 and filename[0].isdigit() and filename[1] == '_':
            # Create the new filename by removing the first two characters
            new_filename = filename[2:]
            
            # Full paths for old and new filenames
            old_filepath = os.path.join(directory, filename)
            new_filepath = os.path.join(directory, new_filename)
            
            # Rename the file
            try:
                os.rename(old_filepath, new_filepath)
                print(f"Renamed: {filename} -> {new_filename}")
            except Exception as e:
                print(f"Error renaming {filename}: {e}")

# Specify the directory path
directory_path = r'test\test_data'

# Call the function to rename files
rename_files_in_directory(directory_path)

Renamed: 0_u00003s00001hw00001.svc -> u00003s00001hw00001.svc
Renamed: 0_u00003s00001hw00003.svc -> u00003s00001hw00003.svc
Renamed: 0_u00035s00001hw00004.svc -> u00035s00001hw00004.svc
Renamed: 0_u00040s00001hw00004.svc -> u00040s00001hw00004.svc
Renamed: 0_u00040s00001hw00006.svc -> u00040s00001hw00006.svc
Renamed: 0_u00040s00001hw00007.svc -> u00040s00001hw00007.svc
Renamed: 0_u00044s00001hw00001.svc -> u00044s00001hw00001.svc
Renamed: 1_u00010s00001hw00001.svc -> u00010s00001hw00001.svc
Renamed: 1_u00010s00001hw00002.svc -> u00010s00001hw00002.svc
Renamed: 1_u00010s00001hw00003.svc -> u00010s00001hw00003.svc
Renamed: 1_u00010s00001hw00004.svc -> u00010s00001hw00004.svc
Renamed: 1_u00010s00001hw00005.svc -> u00010s00001hw00005.svc
Renamed: 1_u00010s00001hw00006.svc -> u00010s00001hw00006.svc
Renamed: 1_u00010s00001hw00007.svc -> u00010s00001hw00007.svc
Renamed: 1_u00021s00001hw00004.svc -> u00021s00001hw00004.svc
Renamed: 1_u00021s00001hw00007.svc -> u00021s00001hw00007.svc
Renamed:

In [3]:
import os
import shutil
from collections import defaultdict

def complete_user_tasks(input_directory, output_directory):
    # Create the output directory if it doesn't exist
    os.makedirs(output_directory, exist_ok=True)

    # Group files by user
    user_files = defaultdict(list)
    for filename in os.listdir(input_directory):
        if filename.endswith('.svc'):
            # Extract user identifier (u00001s00001hw part)
            user_id = filename.split('hw')[0] + 'hw'
            user_files[user_id].append(filename)

    # Process each user's files
    for user_id, files in user_files.items():
        # Extract existing task numbers
        existing_tasks = [int(f.split('hw')[1].split('.')[0]) for f in files]
        
        # Determine missing tasks
        all_tasks = set(range(1, 8))
        missing_tasks = sorted(list(all_tasks - set(existing_tasks)))
        
        # If all tasks are present, skip this user
        if not missing_tasks:
            continue
        
        # Sort existing files to ensure consistent copying
        files.sort(key=lambda x: int(x.split('hw')[1].split('.')[0]))
        
        # Copy and rename files to fill missing tasks
        for task_num in missing_tasks:
            # Cycle through existing tasks if we run out
            source_file = files[(task_num - 1) % len(files)]
            
            # Create new filename
            new_filename = f"{user_id}{task_num:05d}.svc"
            
            # Full paths
            source_path = os.path.join(input_directory, source_file)
            dest_path = os.path.join(output_directory, new_filename)
            
            # Copy the file
            shutil.copy2(source_path, dest_path)
            print(f"Copied {source_file} to {new_filename}")

# Directories
input_dir = 'test/test_data'
output_dir = 'test/completed_data'

# Run the completion process
complete_user_tasks(input_dir, output_dir)

Copied u00001s00001hw00002.svc to u00001s00001hw00001.svc
Copied u00001s00001hw00007.svc to u00001s00001hw00004.svc
Copied u00001s00001hw00003.svc to u00001s00001hw00006.svc
Copied u00002s00001hw00006.svc to u00002s00001hw00004.svc
Copied u00002s00001hw00007.svc to u00002s00001hw00005.svc
Copied u00003s00001hw00003.svc to u00003s00001hw00002.svc
Copied u00003s00001hw00003.svc to u00003s00001hw00004.svc
Copied u00003s00001hw00001.svc to u00003s00001hw00005.svc
Copied u00003s00001hw00003.svc to u00003s00001hw00006.svc
Copied u00003s00001hw00001.svc to u00003s00001hw00007.svc
Copied u00014s00001hw00005.svc to u00014s00001hw00004.svc
Copied u00014s00001hw00002.svc to u00014s00001hw00007.svc
Copied u00020s00001hw00001.svc to u00020s00001hw00005.svc
Copied u00020s00001hw00002.svc to u00020s00001hw00006.svc
Copied u00020s00001hw00003.svc to u00020s00001hw00007.svc
Copied u00021s00001hw00007.svc to u00021s00001hw00005.svc
Copied u00021s00001hw00001.svc to u00021s00001hw00006.svc
Copied u00025s