# End-to-end Model training & deployment using AWS Sagemaker Platform

## 1. Import libraries & modules:

In [1]:
%cd ..
from sagemaker.pytorch import PyTorch
from sagemaker import get_execution_role
import os
import zipfile
import boto3
from pathlib import Path
import pandas as pd
import shutil

/home/ec2-user/SageMaker/gender-classification




sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


## 2. Prepare data:

In [2]:
# Unzip the original dataset file
os.makedirs('data', exist_ok=True)
with zipfile.ZipFile('gender_classification_dataset.zip') as zipf:
    zipf.extractall('data')

In [3]:
# Load csv files
train_df = pd.read_csv('data/train.csv')
test_df = pd.read_csv('data/test.csv')

In [4]:
# Create folders for splitting
os.makedirs('data/train/male', exist_ok=True)
os.makedirs('data/train/female', exist_ok=True)
os.makedirs('data/test/male', exist_ok=True)
os.makedirs('data/test/female', exist_ok=True)

In [5]:
# Copy files to separate folders
for path in train_df.path:
    shutil.copy(f'data/{path}', f'data/train/{path}')
for path in test_df.path:
    shutil.copy(f'data/{path}', f'data/test/{path}')
shutil.copy('data/train.csv', 'data/train/train.csv')
shutil.copy('data/test.csv', 'data/test/test.csv')

'data/test/test.csv'

In [6]:
# Pack to zip files
with zipfile.ZipFile('train.zip', 'w') as train_zip:
    for path in train_df.path:
        train_zip.write(f'data/train/{path}', arcname=path)
    train_zip.write('data/train/train.csv', arcname='train.csv')

with zipfile.ZipFile('test.zip', 'w') as test_zip:
    for path in test_df.path:
        test_zip.write(f'data/test/{path}', arcname=path)
    test_zip.write('data/test/test.csv', arcname='test.csv')

In [7]:
# Upload to S3
bucket = 'hieunt-sagemaker-gender-classification'
prefix = 'data'
s3 = boto3.client('s3')
s3.upload_file('train.zip', bucket, f'{prefix}/train.zip')
s3.upload_file('test.zip', bucket, f'{prefix}/test.zip')

In [8]:
# Clean-up
shutil.rmtree('data')
Path('train.zip').unlink()
Path('test.zip').unlink()

## 3. Training the model with training script (`training.py`):

In [8]:
role = get_execution_role()

model = PyTorch(
    entry_point='train_and_deploy.py',
    source_dir='code',
    role=role,
    instance_type='ml.m5.xlarge',
    instance_count=1,
    framework_version='2.2.0',
    py_version='py310',
    hyperparameters=dict(epochs=10, batch_size=32, learning_rate=1e-3, model_filename='gender_classifier_250426.pt')
)

model.fit({'train': 's3://hieunt-sagemaker-gender-classification/data/train.zip',
           'test': 's3://hieunt-sagemaker-gender-classification/data/test.zip'})

2025-04-26 15:46:39 Starting - Starting the training job...
2025-04-26 15:47:10 Downloading - Downloading input data...
2025-04-26 15:47:30 Downloading - Downloading the training image......
2025-04-26 15:48:40 Training - Training image download completed. Training in progress...[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
  "cipher": algorithms.TripleDES,[0m
  "class": algorithms.TripleDES,[0m
[34m2025-04-26 15:48:48,824 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2025-04-26 15:48:48,824 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2025-04-26 15:48:48,825 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2025-04-26 15:48:48,834 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2025-04-26 15:48:48,836 sa

## 4. Deploy the trained model:

In [11]:
model.deploy(instance_type='ml.m5.xlarge', initial_instance_count=1, endpoint_name='gender-classifier-v1-1',
             model_name='gender-classification-v1-1', entry_point='src/inference.py')

--------!

<sagemaker.pytorch.model.PyTorchPredictor at 0x7fcb5ed34160>