<h1>C4 Solution</h1>

<h3>Get the data and copy it to S3</h3>

In [None]:
%%capture
!wget https://s3-us-west-1.amazonaws.com/udacity-aind/dog-project/dogImages.zip

In [2]:
%%capture
!unzip dogImages.zip

In [3]:
%%capture
!aws s3 cp dogImages s3://udacitysolution/ --recursive

<h3>Install and import</h3>

In [2]:
%%capture
!pip install smdebug torch torchvision tqdm

In [1]:
import sagemaker
import boto3
from sagemaker.tuner import CategoricalParameter, ContinuousParameter, HyperparameterTuner
from sagemaker.pytorch import PyTorch
from sagemaker import get_execution_role
from sagemaker.debugger import Rule, DebuggerHookConfig, TensorBoardOutputConfig, CollectionConfig, ProfilerRule, rule_configs
from sagemaker.debugger import ProfilerConfig, FrameworkProfile
import os

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


<h3>Set up parameters, estimator, and tuner</h3>

In [2]:
hyperparameter_ranges = {
    "learning_rate": ContinuousParameter(0.001, 0.1),
    "batch_size": CategoricalParameter([32, 64, 128, 256, 512]),
}

role = sagemaker.get_execution_role()

objective_metric_name = "Test Loss"
objective_type = "Minimize"
metric_definitions = [{"Name": "Test Loss", "Regex": "Testing Loss: ([0-9\\.]+)"}]

In [3]:
estimator = PyTorch(
    entry_point="hpo.py",
    base_job_name='pytorch_dog_hpo',
    role=role,
    framework_version="1.4.0",
    instance_count=1,
    instance_type="ml.m5.xlarge",
    py_version='py3'
)

tuner = HyperparameterTuner(
    estimator,
    objective_metric_name,
    hyperparameter_ranges,
    metric_definitions,
    max_jobs=2,
    max_parallel_jobs=2,
    objective_type=objective_type
)

<h3>Fit the tuner</h3>

In [4]:
bucket_name = 'udacity-ml-aws'
folder_name = 'dogImages'

In [6]:
os.environ['SM_CHANNEL_TRAINING']=f's3://{bucket_name}/{folder_name}'
os.environ['SM_MODEL_DIR']=f's3://{bucket_name}/{folder_name}/model/'
os.environ['SM_OUTPUT_DATA_DIR']=f's3://{bucket_name}/{folder_name}/output/'
tuner.fit({"training": f"s3://{bucket_name}/{foler_name}/"})

.........................................................................................................................................................................................................................................................................................................!


<h3>Describe the tuning results</h3>


In [7]:
from sagemaker.analytics import HyperparameterTuningJobAnalytics

exp = HyperparameterTuningJobAnalytics(
  hyperparameter_tuning_job_name='pytorch-training-240801-1924')

jobs = exp.dataframe()

jobs.sort_values('FinalObjectiveValue', ascending=0)

Unnamed: 0,batch_size,dropout_rate,fc_layer_size,learning_rate,optimizer,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
0,"""128""",0.5,"""512""",0.001,"""rmsprop""",pytorch-training-240801-1924-008-3239818c,Completed,77.989998,2024-08-01 19:37:05+00:00,2024-08-01 19:44:45+00:00,460.0
3,"""64""",0.491989,"""256""",0.00084,"""rmsprop""",pytorch-training-240801-1924-005-cee15837,Completed,76.32,2024-08-01 19:37:00+00:00,2024-08-01 19:44:40+00:00,460.0
4,"""128""",0.495166,"""256""",0.00051,"""rmsprop""",pytorch-training-240801-1924-004-a572276c,Completed,73.559998,2024-08-01 19:25:04+00:00,2024-08-01 19:36:02+00:00,658.0
6,"""128""",0.420479,"""256""",0.00033,"""rmsprop""",pytorch-training-240801-1924-002-d813dd15,Completed,71.769997,2024-08-01 19:24:50+00:00,2024-08-01 19:35:58+00:00,668.0
2,"""128""",0.472669,"""512""",0.000119,"""adam""",pytorch-training-240801-1924-006-a76a076a,Completed,71.410004,2024-08-01 19:37:03+00:00,2024-08-01 19:44:44+00:00,461.0
7,"""64""",0.352767,"""256""",7.9e-05,"""rmsprop""",pytorch-training-240801-1924-001-cd94c0a3,Completed,67.110001,2024-08-01 19:24:53+00:00,2024-08-01 19:35:56+00:00,663.0
1,"""128""",0.5,"""256""",0.001,"""rmsprop""",pytorch-training-240801-1924-007-42fa60cf,Completed,63.279999,2024-08-01 19:37:03+00:00,2024-08-01 19:44:46+00:00,463.0
5,"""128""",0.462734,"""256""",2e-05,"""adam""",pytorch-training-240801-1924-003-950e7a6c,Completed,22.610001,2024-08-01 19:24:52+00:00,2024-08-01 19:36:04+00:00,672.0


## Imp: If kernel dies, how to continue from a completed training job

In [8]:
#BetterTrainingJobName='pytorch-training-210623-2156-001-fdd5e081'

In [9]:
#my_estimator = sagemaker.estimator.Estimator.attach(BetterTrainingJobName)


In [10]:
#my_estimator.hyperparameters()

In [11]:
#best_estimator=my_estimator

<h3>Prepare to perform Training on Best Estimator</h3>

In [12]:
best_estimator=tuner.best_estimator()


2021-07-05 22:49:50 Starting - Preparing the instances for training
2021-07-05 22:49:50 Downloading - Downloading input data
2021-07-05 22:49:50 Training - Training image download completed. Training in progress.
2021-07-05 22:49:50 Uploading - Uploading generated training model
2021-07-05 22:49:50 Completed - Training job completed


In [13]:
best_estimator.hyperparameters()

{'_tuning_objective_metric': '"Test Loss"',
 'batch_size': '"32"',
 'learning_rate': '0.0011427061709888163',
 'sagemaker_container_log_level': '20',
 'sagemaker_estimator_class_name': '"PyTorch"',
 'sagemaker_estimator_module': '"sagemaker.pytorch.estimator"',
 'sagemaker_job_name': '"pytorch_dog_hpo-2021-07-05-22-28-32-653"',
 'sagemaker_program': '"hpo.py"',
 'sagemaker_region': '"us-east-1"',
 'sagemaker_submit_directory': '"s3://sagemaker-us-east-1-503477914929/pytorch_dog_hpo-2021-07-05-22-28-32-653/source/sourcedir.tar.gz"'}

In [14]:
hyperparameters = {"batch_size": int(best_estimator.hyperparameters()['batch_size'].replace('"', '')), \
                   "learning_rate": best_estimator.hyperparameters()['learning_rate']}
hyperparameters

{'batch_size': 32, 'learning_rate': '0.0011427061709888163'}

In [15]:
rules = [
    Rule.sagemaker(rule_configs.vanishing_gradient()),
    Rule.sagemaker(rule_configs.overfit()),
    Rule.sagemaker(rule_configs.overtraining()),
    Rule.sagemaker(rule_configs.poor_weight_initialization()),
    ProfilerRule.sagemaker(rule_configs.ProfilerReport()),
]

In [16]:
hook_config = DebuggerHookConfig(
    hook_parameters={
        "train.save_interval": "1",
        "eval.save_interval": "1"
    }
)

profiler_config = ProfilerConfig(
    system_monitor_interval_millis=500, framework_profile_params=FrameworkProfile(num_steps=1)
)

<h2>Creating an Estimator</h2>

In [17]:
#adjust this cell to accomplish multi-instance training
estimator = PyTorch(
    entry_point='hpo.py',
    base_job_name='dog-pytorch',
    role=role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    framework_version='1.4.0',
    py_version='py3',
    hyperparameters=hyperparameters,
    ## Debugger and Profiler parameters
    rules = rules,
    debugger_hook_config=hook_config,
    profiler_config=profiler_config,
)

In [18]:
estimator.fit({"training": "s3://udacitysolution/"}, wait=False)

<h2>Creating an Estimator - Multi-Instance Training,</h2>

In [19]:
###in this cell, create and fit an estimator using multi-instance training


<h2>Deployment</h2>

In [24]:
model_location=estimator.model_data


In [9]:
import sagemaker
import boto3
from sagemaker.tuner import CategoricalParameter, ContinuousParameter, HyperparameterTuner
from sagemaker.pytorch import PyTorch
from sagemaker import get_execution_role
from sagemaker.debugger import Rule, DebuggerHookConfig, TensorBoardOutputConfig, CollectionConfig, ProfilerRule, rule_configs
from sagemaker.debugger import ProfilerConfig, FrameworkProfile

from sagemaker.pytorch import PyTorchModel
from sagemaker.predictor import Predictor


In [12]:
jpeg_serializer = sagemaker.serializers.IdentitySerializer("image/jpeg")
json_deserializer = sagemaker.deserializers.JSONDeserializer()


class ImagePredictor(Predictor):
    def __init__(self, endpoint_name, sagemaker_session):
        super(ImagePredictor, self).__init__(
            endpoint_name,
            sagemaker_session=sagemaker_session,
            serializer=jpeg_serializer,
            deserializer=json_deserializer,
        )

In [13]:
# My model from before to skip steps

current_job_name = 'pytorch-training-2024-08-01-23-18-00-161'
sagemaker_session = sagemaker.Session()
estimator = sagemaker.estimator.Estimator.attach(current_job_name, sagemaker_session=sagemaker_session)
model_location=estimator.model_data


2024-08-01 23:41:02 Starting - Preparing the instances for training
2024-08-01 23:41:02 Downloading - Downloading the training image
2024-08-01 23:41:02 Training - Training image download completed. Training in progress.
2024-08-01 23:41:02 Uploading - Uploading generated training model
2024-08-01 23:41:02 Completed - Training job completed


In [30]:
pytorch_model = PyTorchModel(model_data=model_location, role=role, entry_point='infernce2.py',py_version='py3',
                             framework_version='1.8',
                             predictor_cls=ImagePredictor)

In [31]:
predictor = pytorch_model.deploy(initial_instance_count=1, instance_type='ml.m5.large')


---------------!


2024-08-01 23:41:02 Starting - Preparing the instances for training
2024-08-01 23:41:02 Downloading - Downloading the training image
2024-08-01 23:41:02 Training - Training image download completed. Training in progress.
2024-08-01 23:41:02 Uploading - Uploading generated training model
2024-08-01 23:41:02 Completed - Training job completed


In [None]:
import requests
#request_dict={ "url": "https://cdn1-www.cattime.com/assets/uploads/2011/12/file_2744_british-shorthair-460x290-460x290.jpg" }
request_dict={ "url": "https://s3.amazonaws.com/cdn-origin-etr.akc.org/wp-content/uploads/2017/11/20113314/Carolina-Dog-standing-outdoors.jpg" }

img_bytes = requests.get(request_dict['url']).content
type(img_bytes)

In [None]:
from PIL import Image
import io
Image.open(io.BytesIO(img_bytes))

In [None]:
response=predictor.predict(img_bytes, initial_args={"ContentType": "image/jpeg"})

In [None]:
import json
response2=predictor.predict(json.dumps(request_dict), initial_args={"ContentType": "application/json"})

In [None]:
type(response2[0][0])

In [None]:
response2[0]

In [None]:
import torch
import numpy as np
np.argmax(response, 1)