In [None]:
import cv2
import os
import numpy as np
import shutil

# --- CONFIGURATION ---
# This path is now the directory where the script itself lives.
original_dataset_path = '/Users/natalyagrokh/AI/ml_expressions/img_datasets/ferckjalfaga_dataset/'
sorted_dataset_path = '/Users/natalyagrokh/AI/ml_expressions/img_datasets_sorted_final/'

# **FIX:** Replace the failing line with os.getcwd()
# This gets the current directory where your notebook is running.
script_dir = os.getcwd() 
prototxt_path = os.path.join(script_dir, 'age_model', 'age_deploy.prototxt')
weights_path = os.path.join(script_dir, 'age_model', 'age_net.caffemodel')


# --- MODEL SETUP ---
AGE_BUCKETS = ["(0-2)", "(4-6)", "(8-12)", "(15-20)", "(25-32)", "(38-43)", "(48-53)", "(60-100)"]
CHILD_BUCKETS = ["(0-2)", "(4-6)", "(8-12)"]
TEEN_REVIEW_BUCKETS = ["(15-20)"]
ADULT_BUCKETS = ["(25-32)", "(38-43)", "(48-53)", "(60-100)"]

# This should now work without error
print("✅ Loading age detection model...")
age_net = cv2.dnn.readNet(prototxt_path, weights_path)
print("✅ Model loaded successfully.")


# --- SCRIPT LOGIC ---
def sort_images_by_age_final():
    """
    Sorts images from the specified dataset into 'adult', 'child', and 'teen_review' subfolders.
    """
    total, adult, child, teen = 0, 0, 0, 0
    os.makedirs(sorted_dataset_path, exist_ok=True)

    for emotion_folder in os.listdir(original_dataset_path):
        emotion_path = os.path.join(original_dataset_path, emotion_folder)
        
        if not os.path.isdir(emotion_path):
            continue

        print(f"\nProcessing folder: {emotion_folder}")

        # Create three destination subfolders for this emotion
        adult_dest = os.path.join(sorted_dataset_path, emotion_folder, 'adult')
        child_dest = os.path.join(sorted_dataset_path, emotion_folder, 'child')
        teen_dest = os.path.join(sorted_dataset_path, emotion_folder, 'teen_review')
        os.makedirs(adult_dest, exist_ok=True)
        os.makedirs(child_dest, exist_ok=True)
        os.makedirs(teen_dest, exist_ok=True)

        for filename in os.listdir(emotion_path):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                image_path = os.path.join(emotion_path, filename)
                try:
                    image = cv2.imread(image_path)
                    if image is None: continue

                    if len(image.shape) == 2 or image.shape[2] == 1:
                        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)

                    total += 1
                    blob = cv2.dnn.blobFromImage(image, 1.0, (227, 227), (78.4263377603, 87.7689143744, 114.895847746), swapRB=False)
                    age_net.setInput(blob)
                    predictions = age_net.forward()
                    age = AGE_BUCKETS[predictions[0].argmax()]

                    if age in ADULT_BUCKETS:
                        adult += 1
                        dest_path = adult_dest
                    elif age in TEEN_REVIEW_BUCKETS:
                        teen += 1
                        dest_path = teen_dest
                    else:
                        child += 1
                        dest_path = child_dest
                    
                    shutil.copy2(image_path, os.path.join(dest_path, filename))

                except Exception as e:
                    print(f"  [ERROR] Could not process {filename}: {e}")

    print("\n--- Sorting Complete ---")
    print(f"Total images scanned: {total}")
    print(f"Images classified as 'adult': {adult}")
    print(f"Images classified as 'child': {child}")
    print(f"Images classified as 'teen_review': {teen}")
    print(f"✅ Sorted dataset is ready for review at: {sorted_dataset_path}")


if __name__ == "__main__":
    sort_images_by_age_final()

In [3]:
from pypdf import PdfReader, PdfWriter

def split_pdf(input_path, output_path, start_page, end_page):
    """
    Extracts a range of pages from a PDF and saves it as a new file.
    Note: Page numbers are 1-based for user convenience.
    """
    try:
        reader = PdfReader(input_path)
        writer = PdfWriter()

        # Page numbers in pypdf are 0-indexed, so we adjust.
        for page_num in range(start_page - 1, end_page):
            writer.add_page(reader.pages[page_num])

        with open(output_path, "wb") as out_file:
            writer.write(out_file)
        
        print(f"Successfully created {output_path} (Pages {start_page}-{end_page})")

    except Exception as e:
        print(f"An error occurred while creating {output_path}: {e}")

if __name__ == "__main__":
    # === EDIT THIS LINE ===
    # Replace the path below with the full path to your PDF file.
    # Make sure the file name is at the end of the path.
    # Example: "/Users/YourUsername/Downloads/GROKH Tranche 1 (1-928).pdf"
    
    input_file = "/Users/natalyagrokh/Downloads/GROKH Tranche 1 (1-928) (1).pdf"
    
    # ======================
    
    # Define the page ranges for each of the four parts
    splits = [
        {"part": 1, "start": 1, "end": 232},
        {"part": 2, "start": 233, "end": 464},
        {"part": 3, "start": 465, "end": 696},
        {"part": 4, "start": 697, "end": 928},
    ]

    print(f"Starting to split '{input_file}'...")

    for split_info in splits:
        part_num = split_info["part"]
        # The new files will be saved in the same directory as your Jupyter Notebook
        output_file = f"GROKH Tranche 1 (Part {part_num}).pdf"
        split_pdf(input_file, output_file, split_info["start"], split_info["end"])

    print("Splitting process complete.")

Starting to split '/Users/natalyagrokh/Downloads/GROKH Tranche 1 (1-928) (1).pdf'...
Successfully created GROKH Tranche 1 (Part 1).pdf (Pages 1-232)
Successfully created GROKH Tranche 1 (Part 2).pdf (Pages 233-464)
Successfully created GROKH Tranche 1 (Part 3).pdf (Pages 465-696)
Successfully created GROKH Tranche 1 (Part 4).pdf (Pages 697-928)
Splitting process complete.
