In [1]:
pip install tensorflow==2.12

In [2]:
pip install sagemaker-experiments

# Hyperparameter Tuning

In [None]:
columns = ['mode_payload_bytes_delta_len', 'mode_fwd_packets_delta_len',
               'mean_bwd_payload_bytes_delta_len', 'cov_bwd_header_bytes_delta_len',
               'mode_packets_delta_len', 'cov_bwd_packets_delta_len', 'active_skewness',
               'median_bwd_packets_delta_len', 'bwd_payload_bytes_skewness',
               'mean_bwd_packets_delta_len', 'mode_bwd_payload_bytes_delta_len',
               'fwd_total_header_bytes', 'median_fwd_payload_bytes_delta_len',
               'skewness_bwd_header_bytes_delta_len', 'active_min',
               'mode_fwd_payload_bytes_delta_len', 'Sample Classification']

In [None]:
import os
import uuid
import boto3
import sagemaker
from sagemaker.tensorflow import TensorFlow
from sagemaker.tuner import IntegerParameter, ContinuousParameter, HyperparameterTuner,CategoricalParameter
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from datetime import datetime
from sagemaker.experiments.run import Run, load_run


# Setup AWS and SageMaker configurations
role = sagemaker.get_execution_role()
region = sagemaker.Session().boto_region_name
s3 = boto3.client("s3")
bucket = 'multiclass-balanced-two-million'
file_key = 'merged-multiclass-balanced-two-million.csv'
s3_input_path = f's3://{bucket}/{file_key}'
s3_output_path = f's3://{bucket}/lstm_training_output_confusion/'
experiment_name =  "LSTM-Tuning"
experiment_description = "Experiment to optimize LSTM model"

# Ensure the experiment is created
try:
    lstm_experiment = Experiment.load(experiment_name=experiment_name)
except Exception:
    lstm_experiment = Experiment.create(experiment_name=experiment_name, description=experiment_description, sagemaker_boto_client=boto3.client('sagemaker'))

def generate_unique_name(base_name):
    current_time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
    unique_id = uuid.uuid4().hex[:6]  # Generate a short UUID
    return f"{base_name}-{current_time}-{unique_id}"

def create_or_load_trial(experiment_name, base_trial_name):
    trial_name = generate_unique_name(base_trial_name)
    # Directly create a new trial without trying to load (since it's unique)
    return Trial.create(experiment_name=experiment_name, trial_name=trial_name, sagemaker_boto_client=boto3.client('sagemaker'))



trial = create_or_load_trial(experiment_name, "LSTM-Trial")
# 
# Unique run name for tracking this specific training execution
# run_name = generate_unique_name("Run")

experiment_config = {
    'ExperimentName': lstm_experiment.experiment_name,
    'TrialName': trial.trial_name,
    'TrialComponentDisplayName': trial.trial_name
}

# TensorFlow estimator configuration
estimator = TensorFlow(entry_point='lstm_with_tuning.py',
                       role=role,
                       framework_version="2.12",
                       py_version='py310',
                       instance_type='ml.g4dn.16xlarge',
                       instance_count=1,
                       output_path=s3_output_path,
                       script_mode=True,
                       hyperparameters={
                        
                           'trial_name': trial.trial_name  # Pass the trial name as a hyperparameter
                       })



hyperparameter_ranges = {
    'batch_size': IntegerParameter(128, 512),
    'learning_rate': ContinuousParameter(0.0001, 0.01),
    'dropout': ContinuousParameter(0.0, 0.5),
    'optimizer': CategoricalParameter(['RMSprop', 'Adam', 'Adagrad', 'Adadelta', 'Adamax', 'Nadam']),
    'activation': CategoricalParameter(['tanh', 'relu', 'sigmoid']),
    'epochs':IntegerParameter(5, 50),
    'units':IntegerParameter(50, 200)
    
}



metric_definitions=[{'Name': 'val_accuracy', 'Regex': 'val_accuracy: ([0-9\\.]+)'}]



tuner = HyperparameterTuner(estimator=estimator ,
                            objective_metric_name='val_accuracy',
                            objective_type='Maximize',
                            hyperparameter_ranges= hyperparameter_ranges,
                            metric_definitions=metric_definitions,
                            max_jobs=22,
                            max_parallel_jobs=3,
                            strategy='Bayesian')

# Starting the tuning job with the specified configuration
tuner.fit({'train': s3_input_path}, experiment_config=experiment_config)
