In [34]:
%%writefile train.py
# Imports
import tensorflow as tf
from tensorflow import keras
from PIL import Image
import numpy as np
import pandas as pd 
import os
import zipfile
import datetime
from io import BytesIO
import boto3
from sklearn.model_selection import train_test_split
from keras.preprocessing import image as keras_image_preprocessing
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.utils import to_categorical

# Constants for AWS
os.environ['AWS_ACCESS_KEY_ID'] = os.getenv('ACCESS_KEY')
os.environ['AWS_SECRET_ACCESS_KEY'] = os.getenv('SECRET_KEY')
BUCKET_NAME = "garbagenet-bucket-30032023"
S3_DATA_FILE_PATH = 'FilePath/garbage_dataset_importfile.csv'

# Constants for ML
IMAGE_WIDTH = 224    
IMAGE_HEIGHT = 224
IMAGE_CHANNELS = 3
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_SHAPE = (224,224,3)
NUM_CLASSES = 6
categories = {'paper': 0,'cardboard': 1,'plastic': 2,'metal': 3,'trash': 4,'glass': 5}
BATCH_SIZE = 16
EPOCHS = 15
MODEL_DIR = "models/"

# Custom utilities
def zipfolder(foldername, filename, includeEmptyDir=True):   
    empty_dirs = []  
    zip = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED)  
    for root, dirs, files in os.walk(foldername):  
        empty_dirs.extend([dir for dir in dirs if os.listdir(os.path.join(root, dir)) == []])  
        for name in files:  
            zip.write(os.path.join(root ,name))  
        if includeEmptyDir:  
            for dir in empty_dirs:  
                zif = zipfile.ZipInfo(os.path.join(root, dir) + "/")  
                zip.writestr(zif, "")  
        empty_dirs = []  
    zip.close()
    
# Custom utilities for model training
def custom_aws_dataframe_iterator(df, batch_size, s3_client):
    num_classes = len(df['label'].unique())
    while True:
        # iterate over batches of the dataframe
        for i in range(0, len(df), batch_size):
            # get the batch of file paths and labels
            batch_df = df.iloc[i:i+batch_size]
            batch_paths = batch_df['image_aws_location'].values
            batch_labels = batch_df['label'].values
            # load and preprocess the images in the batch
            batch_images = []
            for s3_object_path in batch_paths:
                s3_key = s3_object_path.split('/', 3)[3]
                response = s3_client.get_object(Bucket=BUCKET_NAME, Key=s3_key)
                s3_image = Image.open(BytesIO(response['Body'].read())).convert('RGB')
                s3_image = s3_image.resize((224, 224))
                s3_image = np.array(s3_image).astype('float32') / 255.0
                batch_images.append(s3_image)
            # Yield the preprocessed images and one-hot encoded labels as a batch
            yield np.array(batch_images), to_categorical(batch_labels, num_classes=num_classes)

# AWS Set Up
def get_aws_s3_client():
  s3_client = boto3.client('s3')
  return s3_client

# Data preparation for custom training
def data_prep():
    s3_client = get_aws_s3_client()
    
    # Set the local file path to save the downloaded object in the current directory
    local_data_file_path = os.path.join(os.getcwd(), 'data.csv')

    # Download the object from the bucket to a local file
    s3_client.download_file(BUCKET_NAME, S3_DATA_FILE_PATH, local_data_file_path)
    
    df = pd.read_csv("data.csv",header=None)
    df.columns = ['image_aws_location', 'label']
    df = df.sample(frac=1).reset_index(drop=True)
    
    # Change the categories from numbers to names
    df["label"] = df["label"].map(categories).astype(str)
    
    # We first split the data into two sets and then split the validate_df to two sets
    train_df, validate_df = train_test_split(df, test_size=0.2, random_state=42)
    validate_df, test_df = train_test_split(validate_df, test_size=0.3, random_state=42)

    train_df = train_df.reset_index(drop=True)
    validate_df = validate_df.reset_index(drop=True)
    test_df = test_df.reset_index(drop=True)
    
    total_train = train_df.shape[0]
    total_test = test_df.shape[0]
    total_validate = validate_df.shape[0]
    
    print("#################### DATA METRICS ####################")
    print('train size = ', total_train, 'validate size = ', total_validate, 'test size = ', total_test)
    
    return (train_df,validate_df,total_train,total_validate)

# Building Tensorflow Model
def get_model(image_shape,num_classes):
    
    model = keras.Sequential([
        keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=image_shape),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Flatten(),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(num_classes, activation='softmax')
    ])
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model
    
            
def train():
    print("#################### TRAINING STARTS ####################")
    
    # Getting the data
    train_df, validate_df, total_train, total_validate = data_prep()
    
    # Getting the model
    model = get_model(IMAGE_SHAPE,NUM_CLASSES)
    
    # Create model save directory
    model_save_path = os.path.join(os.getcwd(), MODEL_DIR)
    os.mkdir(model_save_path) 
    print("Directory '% s' created" % MODEL_DIR)

    s3_client = get_aws_s3_client()

    train_generator = custom_aws_dataframe_iterator(train_df,BATCH_SIZE,s3_client)
    validation_generator = custom_aws_dataframe_iterator(validate_df,BATCH_SIZE,s3_client)
    
    # Model Training
    history = model.fit_generator(
                generator=train_generator, 
                epochs=EPOCHS,
                validation_data=validation_generator,
                validation_steps=total_validate//BATCH_SIZE,
                steps_per_epoch=total_train//BATCH_SIZE,
                #callbacks=callbacks
            )
    
    CONCRETE_INPUT = "numpy_inputs"
    
    # Tensorflow serving utilities
    def _preprocess(bytes_input):
        decoded = tf.io.decode_jpeg(bytes_input, channels=3)
        decoded = tf.image.convert_image_dtype(decoded, tf.float32)
        resized = tf.image.resize(decoded, size=(224, 224))
        return resized


    @tf.function(input_signature=[tf.TensorSpec([None], tf.string)])
    def preprocess_fn(bytes_inputs):
        decoded_images = tf.map_fn(
            _preprocess, bytes_inputs, dtype=tf.float32, back_prop=False
        )
        return {
            CONCRETE_INPUT: decoded_images
        }  # User needs to make sure the key matches model's input


    @tf.function(input_signature=[tf.TensorSpec([None], tf.string)])
    def serving_fn(bytes_inputs):
        images = preprocess_fn(bytes_inputs)
        prob = m_call(**images)
        return prob


    m_call = tf.function(model.call).get_concrete_function(
        [tf.TensorSpec(shape=[None, 224, 224, 3], dtype=tf.float32, name=CONCRETE_INPUT)]
    )

    tf.saved_model.save(model, model_save_path, signatures={"serving_default": serving_fn})
    
    ct = datetime.datetime.now()
    ct_string = ct.strftime('%Y-%m-%d %H:%M:%S')
    
    zipfile_name = 'models'+ct_string+'.zip'
    zipfolder(MODEL_DIR,zipfile_name)

    # Create a new directory
    model_local_directory = "Trained_models"
    os.mkdir(model_local_directory)

    # Move a file into the new directory
    file_to_move = zipfile_name
    os.rename(file_to_move, os.path.join(model_local_directory, file_to_move))
    
    aws_s3_upload_path = "Trained_models/" + zipfile_name
    upload_blob(aws_s3_upload_path)  
    
def upload_blob(upload_path):
    s3_client = get_aws_s3_client()
    s3_client.upload_file(upload_path, BUCKET_NAME, upload_path)

    print(f'{upload_path} has been uploaded to {BUCKET_NAME}.')
    
if __name__ == '__main__':
    print('main')
    train()

Overwriting train.py


In [35]:
%%writefile Dockerfile
FROM tensorflow/tensorflow:2.11.0
WORKDIR /root

ENV ACCESS_KEY="AKIA5F3ECBCQQ2SWFMG7"
ENV SECRET_KEY="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"

COPY requirements.txt ./requirements.txt
COPY train.py ./train.py

RUN pip3 install -r requirements.txt

ENTRYPOINT ["python3", "train.py"]

Overwriting Dockerfile


In [36]:
%%writefile buildspec.yaml
version: 0.2
phases:
    pre_build:
        commands:
          - echo Logging in to Amazon ECR...
          - aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com
    build:
        commands:
          - echo Build started on `date`
          - echo Building the Docker image...          
          - docker build -t $IMAGE_REPO_NAME:$IMAGE_TAG .
          - docker tag $IMAGE_REPO_NAME:$IMAGE_TAG $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/$IMAGE_REPO_NAME:$IMAGE_TAG      
    post_build:
        commands:
          - echo Build completed on `date`
          - echo Pushing the Docker image...
          - docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/$IMAGE_REPO_NAME:$IMAGE_TAG
env:
    variables:
        AWS_DEFAULT_REGION: "us-east-2"
        AWS_ACCOUNT_ID: "905911077025"
        IMAGE_REPO_NAME: "garbagenet-train"
        IMAGE_TAG: "latest"

Overwriting buildspec.yaml


In [37]:
!git add Dockerfile buildspec.yaml train.py
!git commit -m "Adding latest files"
!git push -u --all origin

[master 27e354c] Adding latest files
 Committer: EC2 Default User <ec2-user@ip-172-16-15-205.us-east-2.compute.internal>
Your name and email address were configured automatically based
on your username and hostname. Please check that they are accurate.
You can suppress this message by setting them explicitly:

    git config --global user.name "Your Name"
    git config --global user.email you@example.com

After doing this, you may fix the identity used for this commit with:

    git commit --amend --reset-author

 1 file changed, 2 insertions(+), 2 deletions(-)
Enumerating objects: 5, done.
Counting objects: 100% (5/5), done.
Delta compression using up to 2 threads
Compressing objects: 100% (3/3), done.
Writing objects: 100% (3/3), 331 bytes | 331.00 KiB/s, done.
Total 3 (delta 2), reused 0 (delta 0), pack-reused 0
remote: Validating objects: 100%[K
To https://git-codecommit.us-east-2.amazonaws.com/v1/repos/AmazonSageMaker-garbage-classifier-CT
   4684f82..27e354c  master -> master
b

In [38]:
!aws codebuild start-build --project-name garbagenet-train-image-build-push

{
    "build": {
        "id": "garbagenet-train-image-build-push:a3ceb0e9-6e19-4744-83d3-e07140fbbe40",
        "arn": "arn:aws:codebuild:us-east-2:905911077025:build/garbagenet-train-image-build-push:a3ceb0e9-6e19-4744-83d3-e07140fbbe40",
        "buildNumber": 13,
        "startTime": 1680156878.367,
        "currentPhase": "QUEUED",
        "buildStatus": "IN_PROGRESS",
        "sourceVersion": "refs/heads/master",
        "projectName": "garbagenet-train-image-build-push",
        "phases": [
            {
                "phaseType": "SUBMITTED",
                "phaseStatus": "SUCCEEDED",
                "startTime": 1680156878.367,
                "endTime": 1680156878.409,
                "durationInSeconds": 0
            },
            {
                "phaseType": "QUEUED",
                "startTime": 1680156878.409
            }
        ],
        "source": {
            "type": "CODECOMMIT",
            "location": "https://git-codecommit.us-east-2.amazonaws.com/v1/repo

In [48]:
from datetime import datetime
TIMESTAMP = datetime.now().strftime('%Y%m%d%H%M%S')
JOB_NAME=f"garbage-train-{TIMESTAMP}"

print(JOB_NAME)

garbage-train-20230330064616


In [49]:
!aws sagemaker create-training-job \
    --training-job-name {JOB_NAME} \
    --algorithm-specification "TrainingImage=905911077025.dkr.ecr.us-east-2.amazonaws.com/garbagenet-train:latest,TrainingInputMode=File" \
    --output-data-config "S3OutputPath=s3://garbagenet-bucket-30032023/Bypass/" \
    --resource-config "InstanceType=ml.m5.xlarge,InstanceCount=1,VolumeSizeInGB=1" \
    --stopping-condition "MaxRuntimeInSeconds=7200" \
    --role-arn "arn:aws:iam::905911077025:role/service-role/AmazonSageMaker-ExecutionRole-20230329T232165"

{
    "TrainingJobArn": "arn:aws:sagemaker:us-east-2:905911077025:training-job/garbage-train-20230330064616"
}
