In [17]:
import boto3
import zipfile
import os
import sagemaker
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, accuracy_score

### Data Preprocessing

In [4]:
# Define dataset path
s3_bucket = "id-classifier-images"
s3_key = "images.zip"
local_zip_path = "/tmp/images.zip"
dataset_dir = "/tmp/dataset"

# Download and extract dataset from S3
s3 = boto3.client("s3")
# s3.download_file(s3_bucket, s3_key, local_zip_path)

# with zipfile.ZipFile(local_zip_path, "r") as zip_ref:
#     zip_ref.extractall(dataset_dir)

### Initialize SageMaker session and role

In [8]:
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()


### Define image parameters for model input

In [9]:
IMG_HEIGHT = 150  # Height of input images
IMG_WIDTH = 150   # Width of input images
BATCH_SIZE = 32   # Number of images per batch

### Create ImageDataGenerator for data augmentation and preprocessing

In [12]:
data_gen = ImageDataGenerator(rescale=1./255, validation_split=0.2)  # Normalize pixel values

### Load training dataset

In [14]:
unzip_dir = "/tmp/dataset/images"
train_generator = data_gen.flow_from_directory(
    unzip_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'  # Use 80% of data for training
)

Found 800 images belonging to 10 classes.


### Load validation dataset

In [16]:
validation_generator = data_gen.flow_from_directory(
    unzip_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'  # Use 20% of data for validation
)

Found 200 images belonging to 10 classes.


### Define a simple Convolutional Neural Network (CNN) model

In [18]:
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(len(train_generator.class_indices), activation='softmax')  # Output layer with softmax activation
    ])
    
    # Compile the model with categorical crossentropy loss and Adam optimizer
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model