# Text to Fashion Images

## 1. Upload data to S3
Here I use pokeman dataset as an example, which is composed of 833 image-text pairs. To scale up, you can just process your data into the same format.

In [2]:
import sagemaker
import boto3
import datetime
import json
import os

In [3]:
sagemaker_session = sagemaker.Session()

bucket = sagemaker_session.default_bucket()

role = sagemaker.get_execution_role()

prefix = 'example_data'
inputs_train = sagemaker_session.upload_data(path = "example_data", key_prefix=prefix)
print(inputs_train)

s3://sagemaker-us-east-1-420486383638/example_data


## 2. Start a training job

In [None]:
import time
from sagemaker.estimator import Estimator

region = sagemaker_session.boto_session.region_name

image_uri = f'763104351884.dkr.ecr.{region}.amazonaws.com/pytorch-training:1.13.1-gpu-py39-cu117-ubuntu20.04-sagemaker'

instance_count = 2
instance_type = 'ml.g5.12xlarge'

environment = {
    'NODE_NUMBER':str(instance_count),
    'OUTPUT_DIR': '/opt/ml/model',
    'MODEL_NAME': "stabilityai/stable-diffusion-xl-base-1.0",
    'VAE_NAME': "madebyollin/sdxl-vae-fp16-fix",
    'TRAIN_DIR': '/opt/ml/input/data/train'
}

estimator = Estimator(role=role,
                      entry_point='entry.py',
                      source_dir='./sm_scripts',
                      base_job_name='t2i-acc-launch-2',
                      instance_count=instance_count,
                      instance_type=instance_type,
                      image_uri=image_uri,
                      environment=environment,
                      max_run=2*24*3600, #任务最大存续时间，默认2day，需要提交ticket提升quota最大28天
                      disable_profiler=True,
                      debugger_hook_config=False)


estimator.fit({'train': inputs_train})


INFO:sagemaker:Creating training-job with name: t2i-acc-launch-2-2024-03-22-16-22-47-061


2024-03-22 16:22:50 Starting - Starting the training job...
2024-03-22 16:22:57 Pending - Training job waiting for capacity...
2024-03-22 16:23:23 Pending - Preparing the instances for training...
2024-03-22 16:24:11 Downloading - Downloading input data......
2024-03-22 16:25:11 Downloading - Downloading the training image............
2024-03-22 16:27:22 Training - Training image download completed. Training in progress......[35mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[35mbash: no job control in this shell[0m
[35m2024-03-22 16:28:01,845 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[35m2024-03-22 16:28:01,901 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[35m2024-03-22 16:28:01,911 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[35m2024-03-22 16:28:01,913 sagemaker_pytorch_container.training INFO   