In [1]:
import pandas as pd
import sagemaker
import boto3
import numpy as np

In [None]:
import sagemaker
import boto3

sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket = 'bucket_name'

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='role_name')['Role']['Arn']

print(f"sagemaker role arn: {role}")
print(f"sagemaker session region: {sess.boto_region_name}")

## prepare data

`!mkdir -p train2500/real train2500/fake`

`!mkdir -p test1000/real test1000/fake`

`!ls train/REAL | sort -R | tail -1250 | while read file; do cp train/real/$file train2500/real; done`

`!ls train/FAKE | sort -R | tail -1250 | while read file; do cp train/fake/$file train2500/fake; done`

`!ls test/REAL | sort -R | tail -500 | while read file; do cp test/real/$file test1000/real; done`

`!ls test/FAKE | sort -R | tail -500 | while read file; do cp test/fake/$file test1000/fake; done`


### image processing

In [4]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image_dataset_from_directory

In [6]:
keras.utils.set_random_seed(42)

In [7]:
train_path = "train2500/"
test_path = "test1000/"

train_ds = image_dataset_from_directory(
                        train_path,
                        image_size=(32,32),
                        batch_size=128
                            )

test_ds = image_dataset_from_directory(
                        test_path,
                        image_size=(32,32),
                        batch_size=128
                            )

Found 2500 files belonging to 2 classes.
Found 1000 files belonging to 2 classes.


In [8]:
train_ds.class_names

['fake', 'real']

In [9]:
train_ds

<_BatchDataset element_spec=(TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>

In [13]:
for x,y in train_ds:
    print(y)
    break

tf.Tensor(
[0 0 1 1 0 1 1 1 0 0 0 1 0 0 1 1 0 1 1 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0
 1 0 1 0 1 0 1 1 0 0 0 1 1 0 0 1 0 0 1 1 1 0 1 0 0 0 1 0 0 0 1 0 1 0 1 0 0
 1 0 1 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 1 1 1 0 1 1 1 0 0 1
 1 1 0 1 0 0 0 1 1 1 1 0 0 0 0 1 1], shape=(128,), dtype=int32)


In [15]:
x_train = tf.concat([x for x, y in train_ds], axis=0)
y_train = tf.concat([y for x, y in train_ds], axis=0)

x_test = tf.concat([x for x, y in test_ds], axis=0)
y_test = tf.concat([y for x, y in test_ds], axis=0)

In [17]:
y_train

<tf.Tensor: shape=(2500,), dtype=int32, numpy=array([1, 0, 1, ..., 1, 1, 1], dtype=int32)>

In [None]:
np.savez("processed_data/train", image=x_train, label=y_train) # train.npz
np.savez("processed_data/test", image=x_test, label=y_test) # test.npz

In [19]:
!aws s3 cp processed_data s3://bucket_name/prefix --recursive

test.npz  train.npz


training_job -> code -> train.py model -> requirements.txt gereken kütüphaneleri

In [None]:
%cd code

In [None]:
%%writefile model.py

import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.layers import (
    Dense,
    Dropout,
    LayerNormalization,
)
from tensorflow.keras.layers.experimental.preprocessing import Rescaling


class MultiHeadSelfAttention(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads=8):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(
                f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
            )
        self.projection_dim = embed_dim // num_heads
        self.query_dense = Dense(embed_dim)
        self.key_dense = Dense(embed_dim)
        self.value_dense = Dense(embed_dim)
        self.combine_heads = Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(
            x, (batch_size, -1, self.num_heads, self.projection_dim)
        )
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)
        query = self.separate_heads(query, batch_size)
        key = self.separate_heads(key, batch_size)
        value = self.separate_heads(value, batch_size)

        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(
            attention, (batch_size, -1, self.embed_dim)
        )
        output = self.combine_heads(concat_attention)
        return output


class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, mlp_dim, dropout=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.mlp = tf.keras.Sequential(
            [
                Dense(mlp_dim, activation=tfa.activations.gelu),
                Dropout(dropout),
                Dense(embed_dim),
                Dropout(dropout),
            ]
        )
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)

    def call(self, inputs, training):
        inputs_norm = self.layernorm1(inputs)
        attn_output = self.att(inputs_norm)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = attn_output + inputs

        out1_norm = self.layernorm2(out1)
        mlp_output = self.mlp(out1_norm)
        mlp_output = self.dropout2(mlp_output, training=training)
        return mlp_output + out1


class VisionTransformer(tf.keras.Model):
    def __init__(
        self,
        image_size,
        patch_size,
        num_layers,
        num_classes,
        d_model,
        num_heads,
        mlp_dim,
        channels=3,
        dropout=0.1,
    ):
        super(VisionTransformer, self).__init__()
        num_patches = (image_size // patch_size) ** 2
        self.patch_dim = channels * patch_size ** 2

        self.patch_size = patch_size
        self.d_model = d_model
        self.num_layers = num_layers

        self.rescale = Rescaling(1.0 / 255)
        self.pos_emb = self.add_weight(
            "pos_emb", shape=(1, num_patches + 1, d_model)
        )
        self.class_emb = self.add_weight("class_emb", shape=(1, 1, d_model))
        self.patch_proj = Dense(d_model)
        self.enc_layers = [
            TransformerBlock(d_model, num_heads, mlp_dim, dropout)
            for _ in range(num_layers)
        ]
        self.mlp_head = tf.keras.Sequential(
            [
                LayerNormalization(epsilon=1e-6),
                Dense(mlp_dim, activation=tfa.activations.gelu),
                Dropout(dropout),
                Dense(num_classes),
            ]
        )

    def extract_patches(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patches = tf.reshape(patches, [batch_size, -1, self.patch_dim])
        return patches

    def call(self, x, training):
        batch_size = tf.shape(x)[0]
        x = self.rescale(x)
        patches = self.extract_patches(x)
        x = self.patch_proj(patches)

        class_emb = tf.broadcast_to(
            self.class_emb, [batch_size, 1, self.d_model]
        )
        x = tf.concat([class_emb, x], axis=1)
        x = x + self.pos_emb

        for layer in self.enc_layers:
            x = layer(x, training)

        # First (class token) is used for classification
        x = self.mlp_head(x[:, 0])
        return x

In [None]:
%%writefile requirements.txt

tensorflow_addons

In [None]:
%%writefile code/train.py

import os
import numpy as np
from argparse import ArgumentParser

import tensorflow as tf
import tensorflow_addons as tfa

from model import VisionTransformer

if __name__ == "__main__":
    parser = ArgumentParser()

    parser.add_argument("--model-dir", type=str, default=os.environ["SM_MODEL_DIR"])
    parser.add_argument("--train", type=str, default=os.environ["SM_CHANNEL_TRAIN"])
    parser.add_argument(
        "--validation", type=str, default=os.environ["SM_CHANNEL_VALIDATION"]
    )
    parser.add_argument("--image-size", type=int, default=32)
    parser.add_argument("--patch-size", type=int, default=4)
    parser.add_argument("--num-layers", type=int, default=4)
    parser.add_argument("--num-classes", type=int, default=2)
    parser.add_argument("--d-model", type=int, default=64)
    parser.add_argument("--num-heads", type=int, default=4)
    parser.add_argument("--mlp-dim", type=int, default=128)
    parser.add_argument("--learning_rate", type=float, default=3e-4)
    parser.add_argument("--weight-decay", type=float, default=1e-4)
    parser.add_argument("--batch-size", type=int, default=256)
    parser.add_argument("--epochs", type=int, default=30)

    args, _ = parser.parse_known_args()

    x_train = np.load(os.path.join(args.train, "train.npz"))["image"]
    y_train = np.load(os.path.join(args.train, "train.npz"))["label"]
    x_test = np.load(os.path.join(args.validation, "test.npz"))["image"]
    y_test = np.load(os.path.join(args.validation, "test.npz"))["label"]

    model = VisionTransformer(
        image_size=args.image_size,
        patch_size=args.patch_size,
        num_layers=args.num_layers,
        num_classes=args.num_classes,
        d_model=args.d_model,
        num_heads=args.num_heads,
        mlp_dim=args.mlp_dim,
        channels=3,
        dropout=0.1,
    )
    model.compile(
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        optimizer=tfa.optimizers.AdamW(
            learning_rate=args.learning_rate, weight_decay=args.weight_decay
        ),
        metrics=["accuracy"],
    )

    model.fit(
        x_train,
        y_train,
        validation_data=(x_test, y_test),
        epochs=args.epochs,
        verbose=2,
    )

    version = "1"
    model.save(os.path.join(args.model_dir, version)) #model.tar.gz

In [20]:
dataset_uri = "bucket_name"

In [None]:
from sagemaker.tensorflow import TensorFlow

hyperparameters = {"image_size": 32,
                   "patch_size": 4,
                   "num-layers":4,
                   "d-model": 64,
                   "num-heads": 4,
                   "mlp-dim": 128,
                   "learning-rate": 3e-4,
                   "weight-decay": 1e-4,
                   "batch-size": 256,
                   "epochs": 20
}

estimator = TensorFlow(
                entry_point="train.py",
                source_dir="code",
                role=role,
                framework_version="2.9",
                py_version="py39",
                instance_type="ml.p3.2xlarge",
                instance_count=1,
                hyperparameters=hyperparameters
)

inputs = {"train": dataset_uri, "validation": dataset_uri}
estimator.fit(inputs)

In [None]:
model.tar.gz