In [None]:
!pip install distro
import distro
if "debian" in distro.linux_distribution()[0].lower():
    ! apt-get update
    ! apt-get install ffmpeg libsm6 libxext6  -y

In [None]:
! apt-get update
! apt-get install -y libgl1-mesa-glx

In [None]:
import sys

!{sys.executable} -m pip install opencv-python-headless
!{sys.executable} -m pip install mxnet

In [None]:
import sagemaker

bucket = sagemaker.Session().default_bucket()
prefix = "Licenseplate_Detection"

print("s3://{}/{}/".format(bucket, prefix))

In [None]:
from sagemaker import get_execution_role

role = get_execution_role()
print(role)
sess = sagemaker.Session()

### For uploading the images to s3 bucket

In [None]:
import boto3
import sagemaker
from botocore.exceptions import NoCredentialsError, PartialCredentialsError

sagemaker_session = sagemaker.Session()
s3_client = boto3.client('s3')

source_bucket = 'Obj_detection_SSD'
destination_bucket = sagemaker_session.default_bucket()

prefixes_to_copy = ['train/labels/', 'valid/labels/']

def copy_to_sagemaker_bucket(source_bucket, destination_bucket, prefixes):
    try:
        for prefix in prefixes:
            paginator = s3_client.get_paginator('list_objects_v2')
            page_iterator = paginator.paginate(Bucket=source_bucket, Prefix=prefix)

            for page in page_iterator:
                if 'Contents' not in page:
                    print(f"No objects found with prefix: {prefix}")
                    continue

                for obj in page['Contents']:
                    source_key = obj['Key']
                    destination_key = source_key  

                    copy_source = {'Bucket': source_bucket, 'Key': source_key}

                    try:
                        #print(f"Copying: {source_key} to s3://{destination_bucket}/{destination_key}")
                        s3_client.copy_object(CopySource=copy_source, Bucket=destination_bucket, Key=destination_key)
                        #print(f"Copy complete: {source_key}")
                    except Exception as e:
                        print(f"Error copying {source_key}: {e}")
    
    except (NoCredentialsError, PartialCredentialsError) as e:
        print(f"Credentials not available: {e}")
    except Exception as e:
        print(f"Error: {e}")

copy_to_sagemaker_bucket(source_bucket, destination_bucket, prefixes_to_copy)
print("Specified files have been copied from source bucket to the default SageMaker S3 bucket.")

### Downloading the dataset in notebook

In [None]:

import os
import boto3
s3 = boto3.client('s3')
bucket = sagemaker.Session().default_bucket()
prefix = ""

local_data_dir = './data/'
train_images_dir = os.path.join(local_data_dir, 'train/images/')
train_labels_dir = os.path.join(local_data_dir, 'train/labels/')
val_images_dir = os.path.join(local_data_dir, 'val/images/')
val_labels_dir = os.path.join(local_data_dir, 'val/labels/')
test_images_dir = os.path.join(local_data_dir, 'test/images/')
test_labels_dir = os.path.join(local_data_dir, 'test/labels/')

os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)
os.makedirs(val_images_dir, exist_ok=True)
os.makedirs(val_labels_dir, exist_ok=True)
os.makedirs(test_images_dir, exist_ok=True)
os.makedirs(test_labels_dir, exist_ok=True)

def download_from_s3(prefix, local_dir):
    paginator = s3.get_paginator('list_objects_v2')
    for result in paginator.paginate(Bucket=bucket, Prefix=prefix):
        for obj in result.get('Contents', []):
            if not obj['Key'].endswith('/'):
                local_file_path = os.path.join(local_dir, os.path.basename(obj['Key']))
                s3.download_file(bucket, obj['Key'], local_file_path)

download_from_s3(f'train/images/', train_images_dir)
download_from_s3(f'train/labels/', train_labels_dir)
download_from_s3(f'valid/images/', val_images_dir)
download_from_s3(f'valid/labels/', val_labels_dir)
download_from_s3(f'test/images/', test_images_dir)
download_from_s3(f'test/labels/', test_labels_dir)
print("Done copied the labels")

### Convert the labels for working with SSD

In [None]:
!apt-get update
!apt-get install -y libgl1-mesa-glx
!pip install opencv-python-headless

In [None]:
import cv2
import pandas as pd
import os

def convert_yolo_to_ssd(image_path, label_path):
    image = cv2.imread(image_path)
    height, width, _ = image.shape

    ssd_labels = []
    with open(label_path, 'r') as file:
        lines = file.readlines()
    
    for line in lines:
        class_id, x_center, y_center, bbox_width, bbox_height = map(float, line.split())
        xmin = int((x_center - bbox_width / 2) * width)
        ymin = int((y_center - bbox_height / 2) * height)
        xmax = int((x_center + bbox_width / 2) * width)
        ymax = int((y_center + bbox_height / 2) * height)
        ssd_labels.append([class_id, xmin, ymin, xmax, ymax])
    
    return ssd_labels

def create_ssd_labels(images_dir, labels_dir, output_file):
    ssd_labels = []
    for label_file in os.listdir(labels_dir):
        image_file = label_file.replace('.txt', '.jpg')
        image_path = os.path.join(images_dir, image_file)
        label_path = os.path.join(labels_dir, label_file)
        if os.path.exists(image_path):
            labels = convert_yolo_to_ssd(image_path, label_path)
            for label in labels:
                class_id, xmin, ymin, xmax, ymax = label
                ssd_labels.append([image_file, xmin, ymin, xmax, ymax, class_id])
    
    df = pd.DataFrame(ssd_labels, columns=['image_file', 'xmin', 'ymin', 'xmax', 'ymax', 'class_id'])
    df.to_csv(output_file, index=False, header=False)

create_ssd_labels('./data/train/images', './data/train/labels', 'train_ssd_labels.csv')
print("train done")
create_ssd_labels('./data/val/images', './data/val/labels', 'val_ssd_labels.csv')
print("val done")
create_ssd_labels('./data/test/images', './data/test/labels', 'test_ssd_labels.csv')
print("test csv done")

### Create list files for generating RecordIo files

In [None]:
import pandas as pd

def generate_lst_file(csv_file, images_dir, output_lst_file):
    df = pd.read_csv(csv_file, header=None)
    with open(output_lst_file, 'w') as f:
        for index, row in df.iterrows():
            image_file = row[0]
            labels = "\t".join(map(str, row[1:]))
            f.write(f"{index}\t{labels}\t{image_file}\n")

train_csv_file = "train_ssd_labels.csv"
val_csv_file = "val_ssd_labels.csv"
test_csv_file = "test_ssd_labels.csv"

train_images_dir = "./data/train/images"
val_images_dir = "./data/val/images"
test_images_dir = "./data/test/images"

generate_lst_file(train_csv_file, train_images_dir, "train.lst")
generate_lst_file(val_csv_file, val_images_dir, "val.lst")
generate_lst_file(test_csv_file, test_images_dir, "test.lst")

### Generate RecordIO files from list files

In [None]:
!pip install numpy==1.23.1
!pip install mxnet
!pip install opencv-python

In [None]:
!python im2rec.py train.lst ./data/train/images --pack-label --num-thread 4
print("Done")

In [None]:
!python im2rec.py test.lst ./data/test/images --pack-label --num-thread 4
print("Done for test.rec")
!python im2rec.py val.lst ./data/val/images --pack-label --num-thread 4
print("Done")

### Saving the RecordIO files in the default s3 of sagemaker

In [None]:
import boto3
import sagemaker

s3_client = boto3.client('s3')
sagemaker_session = sagemaker.Session()
default_bucket = sagemaker_session.default_bucket()

def upload_to_s3(local_file, s3_path, bucket):
    s3_client.upload_file(local_file, bucket, s3_path)

upload_to_s3(f'./train.rec', f'train/train.rec',default_bucket)
print("train.rec uploaded to s3 of sagemaker")

upload_to_s3(f'./val.rec', f'valid/val.rec',default_bucket)
print("val.rec uploaded to s3 of sagemaker")

upload_to_s3(f'./test.rec', f'test/test.rec',default_bucket)
print("test.rec uploaded to s3 of sagemaker")

### Hyperparameter tuning job

In [None]:
from sagemaker.tuner import HyperparameterTuner, ContinuousParameter, IntegerParameter, CategoricalParameter
import sagemaker
from sagemaker.estimator import Estimator
import boto3

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

default_bucket = sagemaker_session.default_bucket()

image_uri = sagemaker.image_uris.retrieve(framework='object-detection', region=region)

od_model = Estimator(
    image_uri=image_uri,
    role=role,
    instance_count=1,
    instance_type='ml.g4dn.2xlarge',
    output_path=f's3://{default_bucket}/output/',
    sagemaker_session=sagemaker_session
)

od_model.set_hyperparameters(
    num_classes=1,
    base_network='resnet-50',
    use_pretrained_model=1,
    lr_scheduler_step=10,
    lr_scheduler_factor=0.1,
    overlap_threshold=0.5,
    nms_threshold=0.45,
    image_shape=512,
    label_width=5,
    num_training_samples=887
)

hyperparameter_ranges = {
    'learning_rate': ContinuousParameter(1e-6, 0.5),
    'mini_batch_size': IntegerParameter(4, 16),
    'momentum': ContinuousParameter(0.0, 0.999),
    'optimizer': CategoricalParameter(['sgd', 'adam', 'rmsprop', 'adadelta']),
    'weight_decay': ContinuousParameter(0.0, 0.999)
}

tuner = HyperparameterTuner(
    estimator=od_model,
    hyperparameter_ranges=hyperparameter_ranges,
    objective_metric_name='validation:mAP',  
    objective_type='Maximize',
    max_jobs=7,
    max_parallel_jobs=1
)

train_input = sagemaker.inputs.TrainingInput(
    s3_data=f's3://{default_bucket}/train/',
    content_type='application/x-recordio'
)
val_input = sagemaker.inputs.TrainingInput(
    s3_data=f's3://{default_bucket}/valid/',
    content_type='application/x-recordio'
)

tuner.fit({'train': train_input, 'validation': val_input})

### Training Job

In [None]:
import sagemaker
import boto3
from sagemaker.estimator import Estimator

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name
default_bucket = sagemaker_session.default_bucket()

image_uri = sagemaker.image_uris.retrieve(framework='object-detection', region=region)

s3_output_path = f's3://{default_bucket}/output/'

od_model = Estimator(
    image_uri=image_uri,
    role=role,
    instance_count=1,
    instance_type='ml.g4dn.2xlarge',
    volume_size=50,
    max_run=360000,
    input_mode='File',
    output_path=s3_output_path,
    sagemaker_session=sagemaker_session
)

od_model.set_hyperparameters(
    base_network='resnet-50',
    use_pretrained_model=1,
    num_classes=1,
    mini_batch_size=12,  
    epochs=30,  
    learning_rate=8.690212861278214e-05, 
    lr_scheduler_step=10,
    lr_scheduler_factor=0.1,
    optimizer='adam',  
    momentum=0.9937708941341203,  
    weight_decay=0.7641299369416104, 
    overlap_threshold=0.5,
    nms_threshold=0.45,
    image_shape=512,
    label_width=8,
    num_training_samples=887
)

train_input = sagemaker.inputs.TrainingInput(s3_data=f's3://{default_bucket}/train/', content_type='application/x-recordio')
val_input = sagemaker.inputs.TrainingInput(s3_data=f's3://{default_bucket}/valid/', content_type='application/x-recordio')

od_model.fit({'train': train_input, 'validation': val_input})

### Registering the model

In [None]:
import time
model_package_group_name = "SSD" + str(round(time.time()))
model_package_group_input_dict = {
 "ModelPackageGroupName" : model_package_group_name,
 "ModelPackageGroupDescription" : "Sample model package group"
}

create_model_package_group_response = sm_client.create_model_package_group(**model_package_group_input_dict)
print('ModelPackageGroup Arn : {}'.format(create_model_package_group_response['ModelPackageGroupArn']))

In [None]:
model_url = "s3://your-bucket-name/model.tar.gz"

modelpackage_inference_specification =  {
    "InferenceSpecification": {
      "Containers": [
         {
            "Image": image_uri,
	    "ModelDataUrl": model_url
         }
      ],
      "SupportedContentTypes": [ "text/csv" ],
      "SupportedResponseMIMETypes": [ "text/csv" ],
   }
 }

# Alternatively, you can specify the model source like this:
# modelpackage_inference_specification["InferenceSpecification"]["Containers"][0]["ModelDataUrl"]=model_url

create_model_package_input_dict = {
    "ModelPackageGroupName" : model_package_group_name,
    "ModelPackageDescription" : "SSD",
    "ModelApprovalStatus" : "PendingManualApproval"
}
create_model_package_input_dict.update(modelpackage_inference_specification)

In [None]:
create_model_package_response = sm_client.create_model_package(**create_model_package_input_dict)
model_package_arn = create_model_package_response["ModelPackageArn"]
print('ModelPackage Version ARN : {}'.format(model_package_arn))