# 🚀 PaddleOCR Recognition Training on AWS SageMaker

## 📋 Overview
This notebook is specifically designed to run on **AWS SageMaker** instances.

### 🎯 Key Features:
- Optimized for SageMaker environment
- Automatic AWS credentials detection
- Built-in S3 integration
- GPU-optimized training
- Checkpoint management

### 📊 Requirements:
- **Instance Type**: `ml.g4dn.xlarge` or higher
- **Data**: Pre-uploaded to S3 bucket
- **Kernel**: `conda_python3` or `Python 3`

### 🔧 Setup Instructions:
1. Upload this notebook to your SageMaker instance
2. Set your S3 bucket name in Cell 2
3. Run cells sequentially

---

In [None]:
# ===== SAGEMAKER ENVIRONMENT SETUP =====
# 🔧 Cell 1: SageMaker Environment & Dependencies Setup

import sys
import os
import json
import subprocess
import time
from pathlib import Path
from datetime import datetime

print("🚀 PaddleOCR Recognition Training on AWS SageMaker")
print("=" * 60)

# ตรวจสอบว่าอยู่บน SageMaker
print(f"📍 Python version: {sys.version}")
print(f"📍 Working directory: {os.getcwd()}")
print(f"📍 Available CPUs: {os.cpu_count()}")

# ตรวจสอบ SageMaker environment
sagemaker_indicators = [
    '/opt/ml' in os.getcwd(),
    'SageMaker' in os.environ.get('SM_FRAMEWORK_MODULE', ''),
    os.path.exists('/opt/ml'),
    'SAGEMAKER_REGION' in os.environ
]

if any(sagemaker_indicators):
    print("✅ Running on AWS SageMaker")
    IS_SAGEMAKER = True
else:
    print("⚠️  Not detected as SageMaker environment")
    print("   This notebook is optimized for SageMaker")
    IS_SAGEMAKER = False

# ติดตั้ง dependencies ที่จำเป็น
print("\n📦 Installing required packages...")
required_packages = [
    "paddlepaddle-gpu==2.5.2",
    "boto3",
    "sagemaker",
    "opencv-python",
    "Pillow",
    "numpy",
    "PyYAML",
    "tqdm",
    "matplotlib",
    "seaborn"
]

for package in required_packages:
    print(f"📦 Installing {package}...")
    try:
        subprocess.run(
            [sys.executable, "-m", "pip", "install", "-q", package], 
            check=True, 
            capture_output=True
        )
        print(f"  ✅ {package} installed successfully")
    except subprocess.CalledProcessError as e:
        print(f"  ⚠️  Warning installing {package}: {e}")

# Import libraries
print("\n📚 Importing libraries...")
try:
    import boto3
    import sagemaker
    import paddle
    import cv2
    import numpy as np
    import yaml
    from tqdm import tqdm
    import matplotlib.pyplot as plt
    import seaborn as sns
    print("✅ All libraries imported successfully")
except ImportError as e:
    print(f"❌ Import error: {e}")
    raise

# ตรวจสอบ GPU
print("\n🔍 GPU Status Check:")
try:
    if paddle.is_compiled_with_cuda():
        gpu_count = paddle.device.cuda.device_count()
        print(f"✅ PaddlePaddle GPU support available")
        print(f"✅ Available GPUs: {gpu_count}")
        
        # แสดงรายละเอียด GPU
        for i in range(gpu_count):
            paddle.device.set_device(f'gpu:{i}')
            place = paddle.CUDAPlace(i)
            print(f"   GPU {i}: Ready")
    else:
        print("⚠️  PaddlePaddle CPU version detected")
        print("   Training will be slower without GPU")
except Exception as e:
    print(f"⚠️  GPU check error: {e}")

print("\n✅ Environment setup completed!")
print("📝 Next: Configure your S3 bucket in Cell 2")

In [None]:
# ===== AWS & S3 CONFIGURATION =====
# 🔧 Cell 2: AWS Credentials & S3 Setup

print("🔐 AWS Configuration Setup")
print("=" * 40)

# 🚨 CONFIGURE YOUR S3 BUCKET HERE
S3_BUCKET = "sagemaker-ocr-train-bucket"  # 👈 เปลี่ยนเป็น bucket ของคุณ
S3_DATA_PREFIX = "recognition-data"
AWS_REGION = "ap-southeast-1"  # 👈 เปลี่ยนเป็น region ของคุณ

print(f"📁 S3 Bucket: {S3_BUCKET}")
print(f"📂 Data Prefix: {S3_DATA_PREFIX}")
print(f"🌍 AWS Region: {AWS_REGION}")

# ตั้งค่า environment variables
os.environ['AWS_DEFAULT_REGION'] = AWS_REGION
os.environ['S3_BUCKET'] = S3_BUCKET

# ตรวจสอบ AWS credentials
print("\n🔍 Checking AWS credentials...")
try:
    # SageMaker จะมี built-in credentials
    sts = boto3.client('sts', region_name=AWS_REGION)
    identity = sts.get_caller_identity()
    
    print(f"✅ AWS credentials valid")
    print(f"   Account ID: {identity['Account']}")
    print(f"   User/Role: {identity['Arn'].split('/')[-1]}")
    
except Exception as e:
    print(f"❌ AWS credentials error: {e}")
    print("💡 Make sure your SageMaker instance has proper IAM role")
    raise

# ตรวจสอบ S3 access
print("\n🔍 Checking S3 access...")
try:
    s3_client = boto3.client('s3', region_name=AWS_REGION)
    s3_resource = boto3.resource('s3', region_name=AWS_REGION)
    
    # ตรวจสอบ bucket
    s3_client.head_bucket(Bucket=S3_BUCKET)
    print(f"✅ S3 bucket accessible: {S3_BUCKET}")
    
    # ตรวจสอบข้อมูลใน bucket
    bucket = s3_resource.Bucket(S3_BUCKET)
    objects = list(bucket.objects.filter(Prefix=S3_DATA_PREFIX).limit(5))
    
    if objects:
        print(f"✅ Found {len(objects)} objects in {S3_DATA_PREFIX}/")
        print("   Sample objects:")
        for obj in objects[:3]:
            print(f"   - {obj.key}")
    else:
        print(f"⚠️  No objects found in {S3_DATA_PREFIX}/")
        print("   Please make sure data is uploaded to S3")
    
except Exception as e:
    print(f"❌ S3 access error: {e}")
    print("💡 Check bucket name and region settings")
    raise

# Initialize SageMaker session
print("\n🚀 Initializing SageMaker session...")
try:
    sagemaker_session = sagemaker.Session()
    role = sagemaker.get_execution_role()
    
    print(f"✅ SageMaker session initialized")
    print(f"   Default bucket: {sagemaker_session.default_bucket()}")
    print(f"   Execution role: {role.split('/')[-1]}")
    
except Exception as e:
    print(f"⚠️  SageMaker session warning: {e}")
    print("   This is normal if not running in SageMaker training job")

print("\n✅ AWS & S3 configuration completed!")
print("📝 Next: Download PaddleOCR repository in Cell 3")

In [None]:
# ===== PADDLEOCR REPOSITORY SETUP =====
# 🔧 Cell 3: Clone & Setup PaddleOCR Repository

print("📥 PaddleOCR Repository Setup")
print("=" * 40)

PADDLEOCR_DIR = "/tmp/PaddleOCR"
WORK_DIR = "/tmp/ocr_training"

# สร้าง working directory
os.makedirs(WORK_DIR, exist_ok=True)
os.chdir(WORK_DIR)

print(f"📁 Working directory: {os.getcwd()}")

# Clone PaddleOCR repository
if os.path.exists(PADDLEOCR_DIR):
    print(f"📂 PaddleOCR already exists at {PADDLEOCR_DIR}")
    print("🔄 Updating repository...")
    os.chdir(PADDLEOCR_DIR)
    subprocess.run(["git", "pull"], capture_output=True)
else:
    print("📥 Cloning PaddleOCR repository...")
    result = subprocess.run([
        "git", "clone", 
        "https://github.com/PaddlePaddle/PaddleOCR.git",
        PADDLEOCR_DIR
    ], capture_output=True, text=True)
    
    if result.returncode == 0:
        print("✅ PaddleOCR repository cloned successfully")
    else:
        print(f"❌ Clone failed: {result.stderr}")
        raise Exception("Failed to clone PaddleOCR")

# เข้าไปใน PaddleOCR directory
os.chdir(PADDLEOCR_DIR)
print(f"📍 Current directory: {os.getcwd()}")

# ติดตั้ง PaddleOCR requirements
print("\n📦 Installing PaddleOCR requirements...")
try:
    subprocess.run([
        sys.executable, "-m", "pip", "install", "-q", "-r", "requirements.txt"
    ], check=True, capture_output=True)
    print("✅ PaddleOCR requirements installed")
except subprocess.CalledProcessError as e:
    print(f"⚠️  Some requirements may have failed to install: {e}")

# ตรวจสอบ available configs
print("\n🔍 Available Recognition configs:")
rec_configs = Path("configs/rec").glob("**/*.yml")
available_configs = []

for config in rec_configs:
    if any(arch in config.name.lower() for arch in ['crnn', 'svtr', 'pp-ocr']):
        available_configs.append(str(config))
        print(f"  📄 {config}")

if available_configs:
    print(f"\n✅ Found {len(available_configs)} recognition configs")
    # เลือก default config
    DEFAULT_CONFIG = "configs/rec/PP-OCRv4/en_PP-OCRv4_rec.yml"
    if os.path.exists(DEFAULT_CONFIG):
        print(f"🎯 Default config: {DEFAULT_CONFIG}")
    else:
        DEFAULT_CONFIG = available_configs[0]
        print(f"🎯 Using first available: {DEFAULT_CONFIG}")
else:
    print("⚠️  No recognition configs found")
    DEFAULT_CONFIG = None

# เพิ่ม PaddleOCR ไปยัง Python path
if PADDLEOCR_DIR not in sys.path:
    sys.path.insert(0, PADDLEOCR_DIR)
    print(f"✅ Added {PADDLEOCR_DIR} to Python path")

print("\n✅ PaddleOCR repository setup completed!")
print("📝 Next: Download training data from S3 in Cell 4")

In [None]:
# ===== DATA DOWNLOAD FROM S3 =====
# 🔧 Cell 4: Download Training Data from S3

print("📥 Downloading Training Data from S3")
print("=" * 40)

LOCAL_DATA_DIR = f"{WORK_DIR}/data"
os.makedirs(LOCAL_DATA_DIR, exist_ok=True)

print(f"📁 Local data directory: {LOCAL_DATA_DIR}")
print(f"☁️  S3 source: s3://{S3_BUCKET}/{S3_DATA_PREFIX}/")

def download_s3_folder(bucket, prefix, local_dir):
    """Download entire S3 folder with progress bar"""
    
    # นับจำนวนไฟล์ทั้งหมด
    print(f"🔍 Scanning files in s3://{bucket}/{prefix}/...")
    paginator = s3_client.get_paginator('list_objects_v2')
    total_files = 0
    total_size = 0
    
    for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
        if 'Contents' in page:
            total_files += len(page['Contents'])
            total_size += sum(obj['Size'] for obj in page['Contents'])
    
    print(f"📊 Found {total_files} files ({total_size / 1024 / 1024:.1f} MB)")
    
    if total_files == 0:
        print(f"⚠️  No files found in s3://{bucket}/{prefix}/")
        return
    
    # Download files with progress bar
    downloaded = 0
    progress_bar = tqdm(total=total_files, desc="Downloading", unit="files")
    
    for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
        if 'Contents' in page:
            for obj in page['Contents']:
                key = obj['Key']
                local_file = os.path.join(local_dir, key.replace(prefix + '/', ''))
                
                # สร้าง directory ถ้าไม่มี
                os.makedirs(os.path.dirname(local_file), exist_ok=True)
                
                # Download file
                try:
                    s3_client.download_file(bucket, key, local_file)
                    downloaded += 1
                    progress_bar.update(1)
                except Exception as e:
                    print(f"⚠️  Failed to download {key}: {e}")
    
    progress_bar.close()
    print(f"✅ Downloaded {downloaded}/{total_files} files")
    return downloaded

# Download ข้อมูลทั้งหมด
try:
    downloaded_count = download_s3_folder(S3_BUCKET, S3_DATA_PREFIX, LOCAL_DATA_DIR)
    
    if downloaded_count > 0:
        print(f"\n✅ Data download completed!")
        
        # ตรวจสอบไฟล์ที่สำคัญ
        important_files = [
            "annotations/train_annotation.txt",
            "annotations/val_annotation.txt", 
            "metadata/character_dict.txt",
            "metadata/dataset_info.json"
        ]
        
        print("\n🔍 Checking important files:")
        for file_path in important_files:
            full_path = os.path.join(LOCAL_DATA_DIR, file_path)
            if os.path.exists(full_path):
                size = os.path.getsize(full_path)
                print(f"  ✅ {file_path} ({size} bytes)")
            else:
                print(f"  ❌ {file_path} - missing")
        
        # แสดงข้อมูล dataset
        metadata_file = os.path.join(LOCAL_DATA_DIR, "metadata/dataset_info.json")
        if os.path.exists(metadata_file):
            with open(metadata_file, 'r') as f:
                dataset_info = json.load(f)
            
            print(f"\n📊 Dataset Information:")
            print(f"   Total images: {dataset_info.get('total_images', 'Unknown')}")
            print(f"   Training: {dataset_info.get('train_count', 'Unknown')}")
            print(f"   Validation: {dataset_info.get('val_count', 'Unknown')}")
            print(f"   Characters: {dataset_info.get('character_count', 'Unknown')}")
    
    else:
        print("❌ No data downloaded. Please check S3 bucket and permissions.")
        raise Exception("Data download failed")
        
except Exception as e:
    print(f"❌ Data download error: {e}")
    raise

print("\n📝 Next: Create training configuration in Cell 5")

In [None]:
# ===== TRAINING CONFIGURATION =====
# 🔧 Cell 5: Create Training Configuration

print("⚙️  Creating Training Configuration")
print("=" * 40)

CONFIG_DIR = f"{WORK_DIR}/configs"
os.makedirs(CONFIG_DIR, exist_ok=True)

# โหลด base config
if DEFAULT_CONFIG and os.path.exists(DEFAULT_CONFIG):
    print(f"📄 Loading base config: {DEFAULT_CONFIG}")
    with open(DEFAULT_CONFIG, 'r', encoding='utf-8') as f:
        config = yaml.safe_load(f)
else:
    print("⚠️  Using minimal config template")
    config = {
        'Global': {},
        'Architecture': {},
        'Loss': {},
        'Optimizer': {},
        'Train': {'dataset': {}},
        'Eval': {'dataset': {}}
    }

# Update config สำหรับ SageMaker
print("🔧 Updating configuration for SageMaker environment...")

# Global settings
config['Global'].update({
    'debug': False,
    'use_gpu': True,
    'epoch_num': 10,  # จำนวน epochs
    'log_smooth_window': 20,
    'print_batch_step': 10,
    'save_model_dir': f'{WORK_DIR}/output/models',
    'save_epoch_step': 2,
    'eval_batch_step': [0, 500],
    'cal_metric_during_train': True,
    'pretrained_model': None,
    'checkpoints': None,
    'save_inference_dir': f'{WORK_DIR}/output/inference',
    'use_visualdl': False,
    'infer_img': None,
    'character_dict_path': f'{LOCAL_DATA_DIR}/metadata/character_dict.txt',
    'max_text_length': 25,
    'infer_mode': False,
    'use_space_char': True,
    'distributed': False
})

# Architecture - CRNN with ResNet backbone
config['Architecture'] = {
    'model_type': 'rec',
    'algorithm': 'CRNN',
    'Transform': None,
    'Backbone': {
        'name': 'MobileNetV3',
        'scale': 0.5,
        'model_name': 'small',
        'small_stride': [1, 2, 2, 2]
    },
    'Neck': {
        'name': 'SequenceEncoder',
        'encoder_type': 'rnn',
        'hidden_size': 48
    },
    'Head': {
        'name': 'CTCHead',
        'fc_decay': 0.00001
    }
}

# Loss function
config['Loss'] = {
    'name': 'CTCLoss'
}

# Optimizer
config['Optimizer'] = {
    'name': 'Adam',
    'beta1': 0.9,
    'beta2': 0.999,
    'lr': {
        'name': 'Cosine',
        'learning_rate': 0.001,
        'warmup_epoch': 2
    },
    'regularizer': {
        'name': 'L2',
        'factor': 1e-06
    }
}

# Training dataset
config['Train'] = {
    'dataset': {
        'name': 'SimpleDataSet',
        'data_dir': f'{LOCAL_DATA_DIR}/images/train',
        'label_file_list': [f'{LOCAL_DATA_DIR}/annotations/train_annotation.txt'],
        'ratio_list': [1.0]
    },
    'loader': {
        'shuffle': True,
        'batch_size_per_card': 32,
        'drop_last': True,
        'num_workers': 4
    },
    'transforms': [
        {'DecodeImage': {'img_mode': 'BGR', 'channel_first': False}},
        {'RecAug': {}},
        {'CTCLabelEncode': {}},
        {'RecResizeImg': {'image_shape': [3, 32, 320]}},
        {'KeepKeys': {'keep_keys': ['image', 'label', 'length']}}
    ]
}

# Evaluation dataset
config['Eval'] = {
    'dataset': {
        'name': 'SimpleDataSet',
        'data_dir': f'{LOCAL_DATA_DIR}/images/val',
        'label_file_list': [f'{LOCAL_DATA_DIR}/annotations/val_annotation.txt']
    },
    'loader': {
        'shuffle': False,
        'drop_last': False,
        'batch_size_per_card': 32,
        'num_workers': 4
    },
    'transforms': [
        {'DecodeImage': {'img_mode': 'BGR', 'channel_first': False}},
        {'CTCLabelEncode': {}},
        {'RecResizeImg': {'image_shape': [3, 32, 320]}},
        {'KeepKeys': {'keep_keys': ['image', 'label', 'length']}}
    ]
}

# บันทึก config
config_file = f"{CONFIG_DIR}/sagemaker_rec_config.yml"
with open(config_file, 'w', encoding='utf-8') as f:
    yaml.dump(config, f, default_flow_style=False, allow_unicode=True)

print(f"✅ Configuration saved: {config_file}")

# สร้าง output directories
output_dirs = [
    config['Global']['save_model_dir'],
    config['Global']['save_inference_dir'],
    f"{WORK_DIR}/logs"
]

for dir_path in output_dirs:
    os.makedirs(dir_path, exist_ok=True)
    print(f"📁 Created: {dir_path}")

# แสดงสรุป config
print(f"\n📋 Training Configuration Summary:")
print(f"   Algorithm: {config['Architecture']['algorithm']}")
print(f"   Backbone: {config['Architecture']['Backbone']['name']}")
print(f"   Epochs: {config['Global']['epoch_num']}")
print(f"   Batch size: {config['Train']['loader']['batch_size_per_card']}")
print(f"   Learning rate: {config['Optimizer']['lr']['learning_rate']}")
print(f"   Character dict: {config['Global']['character_dict_path']}")
print(f"   Model output: {config['Global']['save_model_dir']}")

print("\n✅ Training configuration completed!")
print("📝 Next: Start training in Cell 6")

In [None]:
# ===== START TRAINING =====
# 🔧 Cell 6: Start PaddleOCR Recognition Training

print("🚀 Starting PaddleOCR Recognition Training")
print("=" * 50)

# ตรวจสอบไฟล์ก่อนเริ่มเทรน
print("🔍 Pre-training checks:")

required_files = [
    config_file,
    config['Global']['character_dict_path'],
    config['Train']['dataset']['label_file_list'][0],
    config['Eval']['dataset']['label_file_list'][0]
]

all_files_exist = True
for file_path in required_files:
    if os.path.exists(file_path):
        print(f"  ✅ {os.path.basename(file_path)}")
    else:
        print(f"  ❌ {file_path} - missing")
        all_files_exist = False

if not all_files_exist:
    raise Exception("Required files missing. Please run previous cells.")

# ตรวจสอบ GPU อีกครั้ง
if paddle.is_compiled_with_cuda():
    gpu_count = paddle.device.cuda.device_count()
    print(f"  ✅ GPU available: {gpu_count} device(s)")
    paddle.device.set_device('gpu:0')
else:
    print(f"  ⚠️  Training on CPU (will be slower)")
    paddle.device.set_device('cpu')

# เปลี่ยนไปยัง PaddleOCR directory
os.chdir(PADDLEOCR_DIR)
print(f"📍 Training from: {os.getcwd()}")

# สร้าง training command
training_cmd = [
    sys.executable, 
    "tools/train.py",
    "-c", config_file,
    "-o", "Global.use_gpu=True",
    "-o", "Global.epoch_num=10",
    "-o", "Train.loader.batch_size_per_card=16",  # ลด batch size สำหรับ SageMaker
    "-o", "Global.print_batch_step=5"
]

print(f"\n🎯 Training Command:")
print(f"   {' '.join(training_cmd)}")

print(f"\n🏁 Starting training... (This will take some time)")
print(f"📊 Monitor GPU usage with: watch -n 1 nvidia-smi")
print(f"📈 Training progress will be displayed below:")
print("=" * 60)

# เริ่มเทรน
import subprocess
import time
from datetime import datetime

start_time = datetime.now()
print(f"⏰ Training started at: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")

try:
    # รัน training แบบ real-time output
    process = subprocess.Popen(
        training_cmd,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        universal_newlines=True,
        bufsize=1
    )
    
    # แสดงผล output แบบ real-time
    for line in iter(process.stdout.readline, ''):
        if line:
            print(line.rstrip())
    
    # รอให้ process เสร็จ
    process.wait()
    
    end_time = datetime.now()
    duration = end_time - start_time
    
    if process.returncode == 0:
        print("\n" + "=" * 60)
        print(f"🎉 Training completed successfully!")
        print(f"⏱️  Total training time: {duration}")
        print(f"📁 Models saved in: {config['Global']['save_model_dir']}")
    else:
        print(f"\n❌ Training failed with return code: {process.returncode}")
        raise Exception("Training process failed")
        
except KeyboardInterrupt:
    print("\n⚠️  Training interrupted by user")
    process.terminate()
    
except Exception as e:
    print(f"\n❌ Training error: {e}")
    raise

print("\n📝 Next: Test trained model in Cell 7")

In [None]:
# ===== MODEL TESTING & VALIDATION =====
# 🔧 Cell 7: Test Trained Model

print("🧪 Testing Trained Model")
print("=" * 40)

MODEL_DIR = config['Global']['save_model_dir']
print(f"📁 Model directory: {MODEL_DIR}")

# หาโมเดลล่าสุด
if os.path.exists(MODEL_DIR):
    model_files = [f for f in os.listdir(MODEL_DIR) if f.startswith('latest') or f.startswith('best')]
    if model_files:
        latest_model = sorted(model_files)[-1]
        model_path = os.path.join(MODEL_DIR, latest_model)
        print(f"🎯 Using model: {latest_model}")
    else:
        print("⚠️  No trained models found")
        model_path = None
else:
    print("❌ Model directory not found")
    model_path = None

if model_path:
    print("\n🔍 Running model evaluation...")
    
    # สร้าง evaluation command
    eval_cmd = [
        sys.executable,
        "tools/eval.py",
        "-c", config_file,
        "-o", f"Global.checkpoints={model_path}"
    ]
    
    print(f"📊 Evaluation command: {' '.join(eval_cmd)}")
    
    try:
        result = subprocess.run(eval_cmd, capture_output=True, text=True, timeout=300)
        
        if result.returncode == 0:
            print("✅ Evaluation completed")
            print("📈 Results:")
            print(result.stdout)
        else:
            print(f"⚠️  Evaluation failed: {result.stderr}")
            
    except subprocess.TimeoutExpired:
        print("⚠️  Evaluation timeout (5 minutes)")
    except Exception as e:
        print(f"❌ Evaluation error: {e}")

# ทดสอบการ inference แบบง่าย
print("\n🖼️  Testing inference on sample images...")

sample_images_dir = f"{LOCAL_DATA_DIR}/images/val"
if os.path.exists(sample_images_dir):
    sample_images = [f for f in os.listdir(sample_images_dir) if f.lower().endswith(('.jpg', '.png', '.jpeg'))][:3]
    
    for img_name in sample_images:
        img_path = os.path.join(sample_images_dir, img_name)
        print(f"\n🔍 Testing: {img_name}")
        
        # สร้าง inference command
        infer_cmd = [
            sys.executable,
            "tools/infer_rec.py",
            "-c", config_file,
            "-o", f"Global.checkpoints={model_path}",
            "-o", f"Global.infer_img={img_path}"
        ]
        
        try:
            result = subprocess.run(infer_cmd, capture_output=True, text=True, timeout=30)
            if result.returncode == 0:
                # แยกผลลัพธ์จาก output
                output_lines = result.stdout.strip().split('\n')
                for line in output_lines:
                    if 'result:' in line.lower() or 'text:' in line.lower():
                        print(f"  📝 {line}")
            else:
                print(f"  ⚠️  Inference failed: {result.stderr}")
                
        except subprocess.TimeoutExpired:
            print(f"  ⚠️  Inference timeout for {img_name}")
        except Exception as e:
            print(f"  ❌ Inference error: {e}")

# แสดงสรุปผลลัพธ์
print("\n📊 Training Summary:")
print(f"   Model saved: {MODEL_DIR}")
print(f"   Config used: {config_file}")
print(f"   Character dict: {config['Global']['character_dict_path']}")

print("\n✅ Model testing completed!")
print("📝 Next: Upload results to S3 in Cell 8")

In [None]:
# ===== UPLOAD RESULTS TO S3 =====
# 🔧 Cell 8: Upload Training Results to S3

print("☁️  Uploading Training Results to S3")
print("=" * 40)

S3_OUTPUT_PREFIX = "training-results"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
results_prefix = f"{S3_OUTPUT_PREFIX}/run_{timestamp}"

print(f"📁 S3 destination: s3://{S3_BUCKET}/{results_prefix}/")

def upload_directory_to_s3(local_dir, bucket, s3_prefix):
    """Upload directory to S3 with progress tracking"""
    
    if not os.path.exists(local_dir):
        print(f"⚠️  Directory not found: {local_dir}")
        return 0
    
    # นับไฟล์ทั้งหมด
    all_files = []
    for root, dirs, files in os.walk(local_dir):
        for file in files:
            local_file = os.path.join(root, file)
            relative_path = os.path.relpath(local_file, local_dir)
            s3_key = f"{s3_prefix}/{relative_path}".replace('\\', '/')
            all_files.append((local_file, s3_key))
    
    if not all_files:
        print(f"⚠️  No files found in {local_dir}")
        return 0
    
    print(f"📤 Uploading {len(all_files)} files...")
    
    uploaded = 0
    progress_bar = tqdm(all_files, desc="Uploading", unit="files")
    
    for local_file, s3_key in progress_bar:
        try:
            s3_client.upload_file(local_file, bucket, s3_key)
            uploaded += 1
            progress_bar.set_postfix({"uploaded": uploaded})
        except Exception as e:
            print(f"\n⚠️  Failed to upload {local_file}: {e}")
    
    progress_bar.close()
    print(f"✅ Uploaded {uploaded}/{len(all_files)} files")
    return uploaded

# Upload trained models
print("\n📦 Uploading trained models...")
if os.path.exists(MODEL_DIR):
    model_count = upload_directory_to_s3(MODEL_DIR, S3_BUCKET, f"{results_prefix}/models")
    if model_count > 0:
        print(f"✅ Models uploaded: s3://{S3_BUCKET}/{results_prefix}/models/")
else:
    print("⚠️  No models to upload")

# Upload configuration
print("\n⚙️  Uploading configuration...")
try:
    config_s3_key = f"{results_prefix}/config/sagemaker_rec_config.yml"
    s3_client.upload_file(config_file, S3_BUCKET, config_s3_key)
    print(f"✅ Config uploaded: s3://{S3_BUCKET}/{config_s3_key}")
except Exception as e:
    print(f"⚠️  Config upload failed: {e}")

# Upload logs (ถ้ามี)
log_dir = f"{WORK_DIR}/logs"
if os.path.exists(log_dir) and os.listdir(log_dir):
    print("\n📄 Uploading logs...")
    log_count = upload_directory_to_s3(log_dir, S3_BUCKET, f"{results_prefix}/logs")
    if log_count > 0:
        print(f"✅ Logs uploaded: s3://{S3_BUCKET}/{results_prefix}/logs/")

# สร้างและ upload summary report
print("\n📊 Creating training summary...")
summary_report = {
    "training_info": {
        "timestamp": timestamp,
        "duration": str(datetime.now() - start_time) if 'start_time' in globals() else "Unknown",
        "algorithm": config['Architecture']['algorithm'],
        "backbone": config['Architecture']['Backbone']['name'],
        "epochs": config['Global']['epoch_num'],
        "batch_size": config['Train']['loader']['batch_size_per_card'],
        "learning_rate": config['Optimizer']['lr']['learning_rate']
    },
    "data_info": {
        "train_annotation": config['Train']['dataset']['label_file_list'][0],
        "val_annotation": config['Eval']['dataset']['label_file_list'][0],
        "character_dict": config['Global']['character_dict_path']
    },
    "s3_locations": {
        "models": f"s3://{S3_BUCKET}/{results_prefix}/models/",
        "config": f"s3://{S3_BUCKET}/{results_prefix}/config/",
        "logs": f"s3://{S3_BUCKET}/{results_prefix}/logs/"
    },
    "environment": {
        "platform": "AWS SageMaker",
        "gpu_available": paddle.is_compiled_with_cuda(),
        "python_version": sys.version,
        "paddlepaddle_version": paddle.__version__
    }
}

# บันทึกและ upload summary
summary_file = f"{WORK_DIR}/training_summary.json"
with open(summary_file, 'w', encoding='utf-8') as f:
    json.dump(summary_report, f, indent=2, ensure_ascii=False)

try:
    summary_s3_key = f"{results_prefix}/training_summary.json"
    s3_client.upload_file(summary_file, S3_BUCKET, summary_s3_key)
    print(f"✅ Summary uploaded: s3://{S3_BUCKET}/{summary_s3_key}")
except Exception as e:
    print(f"⚠️  Summary upload failed: {e}")

# แสดงสรุปสุดท้าย
print("\n" + "=" * 60)
print("🎉 TRAINING COMPLETED SUCCESSFULLY!")
print("=" * 60)
print(f"📊 Training Summary:")
print(f"   Timestamp: {timestamp}")
print(f"   Algorithm: {config['Architecture']['algorithm']}")
print(f"   Epochs: {config['Global']['epoch_num']}")
print(f"")
print(f"☁️  Results uploaded to S3:")
print(f"   📁 Models: s3://{S3_BUCKET}/{results_prefix}/models/")
print(f"   ⚙️  Config: s3://{S3_BUCKET}/{results_prefix}/config/")
print(f"   📄 Summary: s3://{S3_BUCKET}/{results_prefix}/training_summary.json")
print(f"")
print(f"🔄 To use trained model:")
print(f"   1. Download from S3")
print(f"   2. Use with PaddleOCR inference tools")
print(f"   3. Character dict: {config['Global']['character_dict_path']}")
print("=" * 60)