In [1]:
import os
import json
import re # Used for advanced string manipulation/sanitization
from pathlib import Path

BASE_DIR = Path.cwd()         # where the notebook is running
DATA_FILE = BASE_DIR.parent / "data" / "family_data.json"

# Define the file path for the family data
# DATA_FILE = "family_data.json"

def sanitize_name(name):
    """
    Converts the name to lowercase and replaces spaces and non-word characters 
    with underscores for safe folder naming.
    
    Args:
        name (str): The raw name string.
        
    Returns:
        str: The sanitized, lowercase, and filesystem-safe name.
    """
    # 1. Convert to lowercase
    sanitized = name.lower()
    # 2. Replace spaces and consecutive non-word characters (like hyphens or periods) 
    #    with a single underscore
    sanitized = re.sub(r'[\s\W]+', '_', sanitized)
    # 3. Trim leading/trailing underscores that might result from the previous step
    return sanitized.strip('_')

def create_named_folders_from_json(data_filepath):
    """
    Reads data from a JSON file and creates folders using the format 'idNUMBER_NAME'.
    
    The JSON data is expected to be a list of objects, each containing 
    'PersonID' (the number) and 'Name' (the name part).

    Args:
        data_filepath (str): The path to the family data JSON file.
    """
    try:
        # Load and parse the JSON data
        with open(data_filepath, 'r') as f:
            family_data = json.load(f)
    except FileNotFoundError:
        print(f"Error: Data file not found at '{data_filepath}'. Please create it or check the path.")
        return
    except json.JSONDecodeError:
        print(f"Error: Could not decode JSON from '{data_filepath}'. Check the file format for errors.")
        return
    except Exception as e:
        print(f"An unexpected error occurred while reading the file: {e}")
        return

    if not isinstance(family_data, list):
        print("Error: JSON file content is not a list of objects. Expected a top-level list.")
        return
        
    print(f"Starting to create {len(family_data)} folders based on JSON entries...")

    folder_count = 0
    for person in family_data:
        try:
            # Safely convert PersonID to string
            person_id = str(person.get('PersonID', 'UNKNOWN'))
            raw_name = person.get('Name', 'unknown_name')
            
            # Sanitize the name for folder creation
            folder_name_part = sanitize_name(raw_name)
            
            # Construct the final folder name: idNUMBER_NAME
            folder_name = f"id{person_id}_{folder_name_part}"

            if not folder_name_part:
                print(f"Warning: Skipping ID {person_id} due to empty or invalid name data.")
                continue

            # ---------- NEW CHECK ----------
            if os.path.exists(folder_name):
                # print(f"Folder already exists, skipping: {folder_name}")
                continue
            # ---------------------------------

            os.makedirs(folder_name, exist_ok=True)
            print(f"Successfully created: {folder_name}")
            folder_count += 1

            
        except OSError as e:
            # Handle system errors (like permissions)
            print(f"Error creating directory {folder_name}: {e}")
        except Exception as e:
            # Handle other unforeseen errors in the loop
            print(f"An unexpected error occurred processing an entry: {e}")
            
    print(f"\nFinished. Total folders created: {folder_count}")


if __name__ == "__main__":
    # The data file path is defined globally at the top of the script
    create_named_folders_from_json(DATA_FILE)


Starting to create 60 folders based on JSON entries...
Successfully created: id58_fekla
Successfully created: id59_taras
Successfully created: id60_ustinia

Finished. Total folders created: 3


In [2]:
import os
import json
import re
from pathlib import Path

BASE_DIR = Path.cwd()         # where the notebook is running
DATA_FILE = BASE_DIR.parent / "data" / "family_data.json"

def sanitize_name(name):
    """
    Converts the name to lowercase and replaces spaces and non-word characters 
    with underscores for safe folder/file naming.
    """
    # 1. Convert to lowercase
    sanitized = name.lower()
    # 2. Replace spaces and consecutive non-word characters with a single underscore
    sanitized = re.sub(r'[\s\W]+', '_', sanitized)
    # 3. Trim leading/trailing underscores
    return sanitized.strip('_')

def create_txt_files_in_existing_folders(data_filepath):
    """
    Reads data from a JSON file and creates a text file inside the pre-existing 
    'idNUMBER_NAME' folder, populated with content from the 'About' field.
    
    This function assumes the target folders already exist.
    
    Args:
        data_filepath (str): The path to the family data JSON file.
    """
    try:
        # Load and parse the JSON data
        with open(data_filepath, 'r', encoding='utf-8') as f:
            family_data = json.load(f)
    except FileNotFoundError:
        print(f"Error: Data file not found at '{data_filepath}'. Please check the path.")
        return
    except json.JSONDecodeError:
        print(f"Error: Could not decode JSON from '{data_filepath}'. Check the file format.")
        return
    except Exception as e:
        print(f"An unexpected error occurred while reading the file: {e}")
        return

    if not isinstance(family_data, list):
        print("Error: JSON file content is not a list of objects. Expected a top-level list.")
        return
        
    print(f"Starting to create {len(family_data)} text files in existing folders...")

    file_count = 0
    for person in family_data:
        try:
            # --- 1. Preparation and Naming ---
            person_id = str(person.get('PersonID', 'UNKNOWN'))
            raw_name = person.get('Name', 'unknown_name')
            
            # Sanitize the name to match the existing folder structure
            name_part = sanitize_name(raw_name)
            
            # Construct the folder name and file name based on the naming convention
            base_name = f"id{person_id}_{name_part}"
            folder_name = base_name # This is the existing folder name
            
            # The file name is the same as the folder name, plus the .txt extension
            file_name = f"{base_name}.txt" 
            
            # Construct the full path for the file (folder_name/file_name)
            file_path = os.path.join(folder_name, file_name)
            
            # Skip if the path is invalid or if the essential name part is missing
            if not name_part:
                print(f"Warning: Skipping file for ID {person_id} due to empty or invalid name data.")
                continue

            # --- 2. File Creation and Writing ---
            
            # Get the 'About' content for the file.
            about_content = person.get('About', f"No 'About' information provided for {raw_name} (ID {person_id}).")
            
            # Check if the parent folder exists before attempting to write the file
            if os.path.isdir(folder_name):

                # ---------- NEW CHECK ----------
                if os.path.exists(file_path):
                    print(f"  -> File already exists, skipping: {file_path}")
                    continue
                # --------------------------------

                # Write the content to the file
                with open(file_path, 'w', encoding='utf-8') as outfile:
                    outfile.write(about_content)
                    
                print(f"  -> Created file: {file_path}")
                file_count += 1
            else:
                print(f"Warning: Folder '{folder_name}' not found. Skipping file creation.")
            
        except OSError as e:
            # Handle system errors (like permissions or invalid path)
            print(f"Error writing file in {folder_name}: {e}")
        except Exception as e:
            # Handle other unforeseen errors in the loop
            print(f"An unexpected error occurred processing an entry: {e}")
            
    print(f"\nFinished. Total text files created: {file_count}")


if __name__ == "__main__":
    # The data file path is defined globally at the top of the script
    create_txt_files_in_existing_folders(DATA_FILE)


Starting to create 60 text files in existing folders...
  -> File already exists, skipping: id1_misha\id1_misha.txt
  -> File already exists, skipping: id2_marina\id2_marina.txt
  -> File already exists, skipping: id3_sergey\id3_sergey.txt
  -> File already exists, skipping: id4_dima\id4_dima.txt
  -> File already exists, skipping: id5_max\id5_max.txt
  -> File already exists, skipping: id6_sophia\id6_sophia.txt
  -> File already exists, skipping: id7_sviatik\id7_sviatik.txt
  -> File already exists, skipping: id8_nadia\id8_nadia.txt
  -> File already exists, skipping: id9_fedia\id9_fedia.txt
  -> File already exists, skipping: id10_nikolay\id10_nikolay.txt
  -> File already exists, skipping: id11_melania\id11_melania.txt
  -> File already exists, skipping: id12_gleb\id12_gleb.txt
  -> File already exists, skipping: id13_masha\id13_masha.txt
  -> File already exists, skipping: id14_olga\id14_olga.txt
  -> File already exists, skipping: id15_dima\id15_dima.txt
  -> File already exists, 