# Model Training Notebook
- In this notebook, we will train an image classifier on the vehicle data we extracted in the previous notebook

## Import Packages

In [1]:
from sagemaker import image_uris
import boto3

from sagemaker.debugger import Rule, rule_configs
from sagemaker.session import TrainingInput

from sagemaker.model_monitor import DataCaptureConfig
from sagemaker.predictor import Predictor
from sagemaker.serializers import IdentitySerializer
import base64

In [None]:
def to_metadata_file(df, prefix):
    df["s3_path"] = df["filenames"]
    df["labels"] = df["labels"].apply(lambda x: 0 if x==8 else 1) # set 48 motorcycle as target
    return df[["row", "labels", "s3_path"]].to_csv(
        f"{prefix}.lst", sep="\t", index=False, header=False
    )
    
to_metadata_file(df_train.copy(), "train")
to_metadata_file(df_test.copy(), "test")

In [None]:
# Upload files
boto3.Session().resource('s3').Bucket(
    bucket).Object('train.lst').upload_file('./train.lst')
boto3.Session().resource('s3').Bucket(
    bucket).Object('test.lst').upload_file('./test.lst')

In [None]:
# Use the image_uris function to retrieve the latest 'image-classification' image 
algo_image = image_uris.retrieve(
    region=region,
    framework='image-classification',
    version='latest'
)
s3_output_location = f"s3://{bucket}/models/image_model"

In [None]:
img_classifier_model=sagemaker.estimator.Estimator(
    image_uri=algo_image,
    role=role,
    instance_count=1,
    instance_type='ml.p3.2xlarge',
    volume_size=5,
    output_path=s3_output_location,
    sagemaker_session=sagemaker.Session()
)

In [None]:
img_classifier_model.set_hyperparameters(
    image_shape= "3,32,32",
    num_classes= 2,
    num_training_samples= len(df_train)
)

In [None]:
model_inputs = {
        "train": sagemaker.inputs.TrainingInput(
            s3_data=f"s3://{bucket}/train/",
            content_type="application/x-image"
        ),
        "validation": sagemaker.inputs.TrainingInput(
            s3_data=f"s3://{bucket}/test/",
            content_type="application/x-image"
        ),
        "train_lst": sagemaker.inputs.TrainingInput(
            s3_data=f"s3://{bucket}/train.lst",
            content_type="application/x-image"
        ),
        "validation_lst": sagemaker.inputs.TrainingInput(
            s3_data=f"s3://{bucket}/test.lst",
            content_type="application/x-image"
        )
}

In [None]:
img_classifier_model.fit(model_inputs)

In [None]:
from sagemaker.model_monitor import DataCaptureConfig
bucket = 'sagemaker-us-east-1-351669278598'
capture_uri = f's3://{bucket}/data-capture'

data_capture_config = DataCaptureConfig(
    enable_capture=True,
    sampling_percentage=100,
    destination_s3_uri=capture_uri
)

In [None]:
deployment = img_classifier_model.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.xlarge',
    data_capture_config=data_capture_config
)

endpoint = deployment.endpoint_name
print(endpoint)

In [None]:
from sagemaker.predictor import Predictor
predictor = Predictor(endpoint_name=endpoint)

In [None]:
predictor.serializer = IdentitySerializer("image/png")
with open("./test/bicycle_s_001789.png", "rb") as f:
    payload = f.read()

    
inference = predictor.predict(payload)