In [3]:
import os
import json
from ciphers import CaesarCipher

# Define the alphabet for Caesar Cipher
alphabet = 'abcdefghijklmnopqrstuvwxyz'

def process_files(directory):
    cipher = CaesarCipher(alphabet)
    
    for filename in os.listdir(directory):
        if filename.endswith(".json"):
            filepath = os.path.join(directory, filename)
            with open(filepath, 'r', encoding='utf-8') as file:
                content = file.read().strip()
                if not content:
                    # print(f"Skipping empty file: {filename}")
                    continue

                try:
                    data = json.loads(content)
                except json.JSONDecodeError:
                    print(f"Error decoding JSON in file: {filename}")
                    continue

            # Process each entry in the JSON file
            for entry in data:
                plain_text = entry['plain_text']
                shift = entry['shift']
                
                # Encrypt the plain_text using Caesar Cipher
                encrypted_text = cipher.encrypt(plain_text, str(shift))
                
                # Update or create 'gold_label' for comparison purposes
                entry['gold_label'] = encrypted_text
            
            # Write the updated data back to the JSON file
            with open(filepath, 'w', encoding='utf-8') as file:
                json.dump(data, file, ensure_ascii=False, indent=4)

# Define the directory containing the encoded JSON files
directory = './data/encoded/caesar-cipher/'

# Process the files in the directory
process_files(directory)


In [3]:
# Random Text Gold Label Generator
import os
import json

def caesar_cipher(text: str, shift: int) -> str:
    """
    Encodes the given text using a Caesar Cipher with a specified shift.
    Preserves punctuation and whitespace; only shifts alphabetic characters.
    """
    encoded = []
    for char in text:
        # Shift only alphabetic characters
        if char.isalpha():
            # Check if it's uppercase or lowercase
            if char.isupper():
                base_ord = ord('A')
            else:
                base_ord = ord('a')
            # Compute offset
            offset = ord(char) - base_ord
            # Shift and wrap around the alphabet
            shifted = (offset + shift) % 26
            # Build the new character
            encoded_char = chr(base_ord + shifted)
            encoded.append(encoded_char)
        else:
            # Leave non-alphabetic characters unchanged
            encoded.append(char)
    return "".join(encoded)

def main():
    data_dir = "./data/encoded/caesar-cipher/random/"

    # Iterate over every file in the directory
    for filename in os.listdir(data_dir):
        if filename.endswith(".json"):
            filepath = os.path.join(data_dir, filename)

            # 1. Read the JSON file
            with open(filepath, "r", encoding="utf-8") as f:
                data = json.load(f)

            # 2. For each item in the JSON array, generate gold_label
            for item in data:
                plain_text = item.get("plain_text")
                shift = item.get("shift")

                if plain_text is not None and shift is not None:
                    # 3. Encode the plain_text with the given shift
                    gold_label = caesar_cipher(plain_text, shift)
                    # 4. Save the result in "gold_label"
                    item["gold_label"] = gold_label

            # 5. Write the updated data back to the same JSON file
            with open(filepath, "w", encoding="utf-8") as f:
                json.dump(data, f, indent=4, ensure_ascii=False)

if __name__ == "__main__":
    main()


In [1]:
#Greek

import os
import json

def caesar_cipher_greek(text: str, shift: int) -> str:
    """
    Encodes the given text using a Caesar Cipher with a specified shift for Greek text.
    Preserves punctuation and whitespace; only shifts alphabetic characters in the Greek alphabet.
    """
    encoded = []
    for char in text:
        # Shift only alphabetic characters
        if '\u0391' <= char <= '\u03A9':  # Uppercase Greek letters
            base_ord = ord('\u0391')  # Base for uppercase Greek
            offset = ord(char) - base_ord
            shifted = (offset + shift) % 25  # Wrap around the Greek alphabet (24 letters)
            encoded_char = chr(base_ord + shifted)
            encoded.append(encoded_char)
        elif '\u03B1' <= char <= '\u03C9':  # Lowercase Greek letters
            base_ord = ord('\u03B1')  # Base for lowercase Greek
            offset = ord(char) - base_ord
            shifted = (offset + shift) % 25  # Wrap around the Greek alphabet
            encoded_char = chr(base_ord + shifted)
            encoded.append(encoded_char)
        else:
            # Leave non-alphabetic characters unchanged
            encoded.append(char)
    return "".join(encoded)

def main():
    data_dir = "./data/encoded/caesar-cipher/greek/"

    # Iterate over every file in the directory
    for filename in os.listdir(data_dir):
        if filename.endswith(".json"):
            filepath = os.path.join(data_dir, filename)

            # 1. Read the JSON file
            with open(filepath, "r", encoding="utf-8") as f:
                data = json.load(f)

            # 2. For each item in the JSON array, generate gold_label
            for item in data:
                plain_text = item.get("plain_text")
                shift = item.get("shift")

                if plain_text is not None and shift is not None:
                    # 3. Encode the plain_text with the given shift for Greek
                    gold_label = caesar_cipher_greek(plain_text, shift)
                    # 4. Save the result in "gold_label"
                    item["gold_label"] = gold_label

            # 5. Write the updated data back to the same JSON file
            with open(filepath, "w", encoding="utf-8") as f:
                json.dump(data, f, indent=4, ensure_ascii=False)

if __name__ == "__main__":
    main()
