In [1]:
!pip install --upgrade sagemaker

Collecting sagemaker
  Downloading sagemaker-2.232.2-py3-none-any.whl.metadata (16 kB)
Collecting sagemaker-mlflow (from sagemaker)
  Downloading sagemaker_mlflow-0.1.0-py3-none-any.whl.metadata (3.3 kB)
Collecting mlflow>=2.8 (from sagemaker-mlflow->sagemaker)
  Downloading mlflow-2.16.2-py3-none-any.whl.metadata (29 kB)
Collecting mlflow-skinny==2.16.2 (from mlflow>=2.8->sagemaker-mlflow->sagemaker)
  Downloading mlflow_skinny-2.16.2-py3-none-any.whl.metadata (30 kB)
Collecting alembic!=1.10.0,<2 (from mlflow>=2.8->sagemaker-mlflow->sagemaker)
  Downloading alembic-1.13.3-py3-none-any.whl.metadata (7.4 kB)
Collecting graphene<4 (from mlflow>=2.8->sagemaker-mlflow->sagemaker)
  Downloading graphene-3.3-py2.py3-none-any.whl.metadata (7.7 kB)
Collecting markdown<4,>=3.3 (from mlflow>=2.8->sagemaker-mlflow->sagemaker)
  Downloading Markdown-3.7-py3-none-any.whl.metadata (7.0 kB)
Collecting gunicorn<24 (from mlflow>=2.8->sagemaker-mlflow->sagemaker)
  Downloading gunicorn-23.0.0-py3-none-

In [None]:
import boto3
import sagemaker
from sagemaker.pytorch import PyTorch
from sagemaker import get_execution_role
from sagemaker.inputs import TrainingInput

In [None]:
s3 = boto3.client('s3')

# SageMaker 세션 생성
session = sagemaker.Session()

# 역할(권한) 설정
role = get_execution_role()

# S3 버킷 설정
bucket = 'lieon-data'
output_path = f's3://{bucket}/Output'

In [None]:
# PyTorch Estimator 설정
estimator = PyTorch(
    entry_point='train.py',  # 학습 스크립트
    source_dir='.',
    role=role,
    instance_count=1,  # 사용할 인스턴스 수
    instance_type='ml.g4dn.2xlarge',  # 사용할 인스턴스 타입 (GPU를 사용할 수 있음)
    framework_version='2.4.0',  # PyTorch 버전
    py_version='py311',
    output_path=output_path,  # 학습된 모델이 저장될 경로
    model_dir=False,
)

In [None]:
wav_dir_train = 's3://lieon-data/Dataset/Train/Audio'
label_dir_train = 's3://lieon-data/Dataset/Train/Label'

wav_dir_val = 's3://lieon-data/Dataset/Val/Audio'
label_dir_val = 's3://lieon-data/Dataset/Val/Label'

wav_dir_test = 's3://lieon-data/Dataset/Test/Audio'
label_dir_test = 's3://lieon-data/Dataset/Test/Label'

train_audio_input = TrainingInput(s3_data=wav_dir_train, content_type='wav')
train_label_input = TrainingInput(s3_data=label_dir_train, content_type='text/csv')

val_audio_input = TrainingInput(s3_data=wav_dir_val, content_type='wav')
val_label_input = TrainingInput(s3_data=label_dir_val, content_type='text/csv')

test_audio_input = TrainingInput(s3_data=wav_dir_test, content_type='wav')
test_label_input = TrainingInput(s3_data=label_dir_test, content_type='text/csv')

In [None]:
# S3에 저장된 데이터 경로 설정 (S3 오디오 및 라벨 파일 경로를 인자로 전달)
estimator.fit({
    'train_audio': train_audio_input,
    'train_label': train_label_input,
    'val_audio': val_audio_input,
    'val_label': val_label_input,
    'test_audio': test_audio_input,
    'test_label': test_label_input
})