# Patient Data Oropharynx

## Requirements

In [3]:
import os
import re

## Patient ID encoder

In [1]:
def encode_decode_patient_id(patient_id: str, coding='encode') -> str:
    """
    Encodes or decodes a patient ID using a mapping from digits to letters.
    
    Parameters:
    - patient_id (str): The patient ID as an 8-character string.
        For encoding, the patient ID should consist of digits.
        For decoding, the patient ID should consist of letters as defined in the mapping.
    - coding (str): Operation mode: 'encode' maps digits to letters, 
                    'decode' converts letters back to digits.
    
    Returns:
    - str: The processed patient ID (encoded or decoded).
    
    Raises:
    - ValueError: If the patient_id does not meet the required format or if an invalid coding mode is provided.
    """
    # Define a bijective mapping from digits to letters.
    mapping = {
        "0": "Q",
        "1": "W",
        "2": "E",
        "3": "R",
        "4": "T",
        "5": "Y",
        "6": "U",
        "7": "I",
        "8": "O",
        "9": "P"
    }
    
    # Create the inverse mapping: letters to digits.
    reverse_mapping = {v: k for k, v in mapping.items()}
    
    def encode_patient_id(patient_id: str) -> str:
        if len(patient_id) != 8 or not patient_id.isdigit():
            raise ValueError("Patient ID for encoding must be an 8-digit string.")
        # Map each digit to its corresponding letter.
        return ''.join(mapping[digit] for digit in patient_id)
    
    def decode_patient_id(encoded_id: str) -> str:
        if len(encoded_id) != 8 or not all(char in reverse_mapping for char in encoded_id):
            raise ValueError("Encoded ID must be an 8-character string with valid mapping letters.")
        # Map each letter back to the corresponding digit.
        return ''.join(reverse_mapping[char] for char in encoded_id)
    
    if coding == 'encode':
        return encode_patient_id(patient_id)
    elif coding == 'decode':
        return decode_patient_id(patient_id)
    else:
        raise ValueError("Coding must be either 'encode' or 'decode'.")


In [2]:
def process_patient_folders(root_dir: str, coding = 'encode'):
    """
    Loops over all folders in the specified directory that match the pattern "*[8 digits]".
    For each matching folder, the function encodes the patient ID (mapping digits to letters)
    using the encode_decode_patient_id function (in 'encode' mode) and renames the folder.
    
    Parameters:
    - root_dir (str): The root directory containing patient folders.
    """
    # Regular expression pattern: an asterisk followed by exactly 8 digits.
    pattern_encode = re.compile(r'(\d{8})$')
    pattern_decode = re.compile(r'([A-Z]{8})$')
    
    for folder in os.listdir(root_dir):
        folder_path = os.path.join(root_dir, folder)
        if os.path.isdir(folder_path):
            if coding == 'encode':
                pattern = pattern_encode
            elif coding == 'decode':
                pattern = pattern_decode

            match = pattern.match(folder)

            if match:
                foldername = match.group(1)
                # Encode the patient ID using our mapping (digits -> letters).
                coded_id = encode_decode_patient_id(foldername, coding=coding)
                new_folder_name = f"{coded_id}"
                new_folder_path = os.path.join(root_dir, new_folder_name)
                
                # Check if a folder with the new name already exists.
                if os.path.exists(new_folder_path):
                    print(f"Folder '{new_folder_name}' already exists. Skipping folder '{folder}'.")
                else:
                    os.rename(folder_path, new_folder_path)
                    print(f"Renamed folder '{folder}' to '{new_folder_name}'")


In [4]:
root_dir = r"/home/loriskeller/Documents/Master Project/Patient data/patient_data_complete/06_midline_extraction"

process_patient_folders(root_dir, coding='decode')

Renamed folder 'WQYYIIWQ' to '10557710'
Renamed folder 'WQYPPWWP' to '10599119'
Renamed folder 'WQTRERIR' to '10432373'
Renamed folder 'WQOYEUOQ' to '10852680'
Renamed folder 'WQIEIOYY' to '10727855'
Renamed folder 'WQORQQTI' to '10830047'
Renamed folder 'WQUORQUU' to '10683066'
Renamed folder 'WQUWPWOR' to '10619183'
Renamed folder 'WQOTQTUT' to '10840464'
Renamed folder 'WQTIRWOE' to '10473182'
Renamed folder 'WQUYOTPI' to '10658497'
Renamed folder 'WQIIPUOW' to '10779681'
Renamed folder 'QRTWEQUI' to '03412067'
Renamed folder 'WQURTEIR' to '10634273'
Renamed folder 'QRRYRTWP' to '03353419'
Renamed folder 'WQRYYTYP' to '10355459'
Renamed folder 'WQUOUUTR' to '10686643'
Renamed folder 'WQIOYEIO' to '10785278'
Renamed folder 'QTOWIQRU' to '04817036'
Renamed folder 'WQTIYOQY' to '10475805'
Renamed folder 'QYYUIYTO' to '05567548'
Renamed folder 'QWQOUYOO' to '01086588'
Renamed folder 'WQYPWWQU' to '10591106'
Renamed folder 'WQUTTOUR' to '10644863'
Renamed folder 'WQUUQETE' to '10660242'
