#  Export the Model

In [None]:
from pathlib import Path
model_name = "my_mnist_model"
model_version = "0001"
model_path = Path(model_name) / model_version
model.save(model_path, save_format="tf")

# Inspect the SavedModel

In [None]:
saved_model_cli show --dir my_mnist_model/0001 --tag_set serve


#  Install TensorFlow Serving

In [None]:
!apt update -q && apt-get install -y tensorflow-model-server
%pip install -q -U tensorflow-serving-api


# Start the TF Serving Server
## Start the server specifying:

Model name and base path.
Ports for gRPC (8500) and REST API (8501).

In [None]:
tensorflow_model_server \
  --port=8500 \
  --rest_api_port=8501 \
  --model_name=my_mnist_model \
  --model_base_path="/path/to/my_mnist_model"


# Query the Model via REST API

In [None]:
import json
request_json = json.dumps({
    "signature_name": "serving_default",
    "instances": X_test[:3].tolist()
})


Send Request: Use the requests library to make a POST request to the server

In [None]:
import requests
response = requests.post("http://localhost:8501/v1/models/my_mnist_model:predict", data=request_json)
response.raise_for_status()
y_proba = np.array(response.json()["predictions"]).round(2)


# Querying TensorFlow Serving via gRPC API
## Setup and Request Creation:

Use PredictRequest from tensorflow_serving.apis.predict_pb2 to create a request.
Specify the model name, signature, and input data using tf.make_tensor_proto.
python
Copy code


In [None]:
from tensorflow_serving.apis.predict_pb2 import PredictRequest
request = PredictRequest()
request.model_spec.name = model_name
request.model_spec.signature_name = "serving_default"
request.inputs[input_name].CopyFrom(tf.make_tensor_proto(X_new))


## Send Request and Receive Response:

Establish a gRPC channel using grpc.
Send the request with a timeout and handle the response.



In [None]:
import grpc
from tensorflow_serving.apis import prediction_service_pb2_grpc
channel = grpc.insecure_channel('localhost:8500')
predict_service = prediction_service_pb2_grpc.PredictionServiceStub(channel)
response = predict_service.Predict(request, timeout=10.0)


# Process the Response:

Convert the response to a tensor using tf.make_ndarray.

In [None]:
y_proba = tf.make_ndarray(response.outputs[output_name])


# ertex AI Setup
## Prerequisites:

Set up a Google Cloud account with billing enabled.
Create a GCP project and activate necessary APIs (e.g., Cloud Storage, Vertex AI).
## Authentication:

Use google.colab.auth for OAuth2 authentication in Colab.



In [None]:
from google.colab import auth
auth.authenticate_user()


# Create a Google Cloud Storage Bucket:

Use the google-cloud-storage library to store your SavedModels.

In [None]:
from google.cloud import storage
storage_client = storage.Client(project=project_id)
bucket = storage_client.create_bucket(bucket_name, location=location)


## Running Models in Web Pages with TensorFlow.js (TFJS)
### Use Cases
Offline Accessibility: Ideal for web apps with intermittent connectivity (e.g., hiking apps).
### Low Latency:
Reduces delay in real-time applications like online games.
### Privacy Preservation:
Keeps user data local for private predictions.
Implementation Example
Use TensorFlow.js to load and run models directly in the browser:

In [None]:
import "https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@latest";
import "https://cdn.jsdelivr.net/npm/@tensorflow-models/mobilenet@1.0.0";
const image = document.getElementById("image");
mobilenet.load().then(model => {
  model.classify(image).then(predictions => {
    predictions.forEach(prediction => {
      console.log(`${prediction.className}: ${(prediction.probability * 100).toFixed(1)}%`);
    });
  });
});


## Configuring TensorFlow to Use GPUs
List Available GPUs

In [None]:
import tensorflow as tf
physical_gpus = tf.config.list_physical_devices('GPU')
print(physical_gpus)


# Control GPU Memory Usage:

Limit memory per GPU:

In [None]:
for gpu in physical_gpus:
    tf.config.set_logical_device_configuration(
        gpu,
        [tf.config.LogicalDeviceConfiguration(memory_limit=2048)]
    )


Enable dynamic memory allocation:



In [None]:
for gpu in physical_gpus:
    tf.config.experimental.set_memory_growth(gpu, True)


Use the environment variable

In [None]:
export TF_FORCE_GPU_ALLOW_GROWTH=true


# Cluster Specification
You start by defining the cluster specification (cluster_spec), which includes the job types and task addresses for each machine involved. Here's an example of a cluster with two workers and one parameter server

In [None]:
cluster_spec = {
    "worker": [
        "machine-a.example.com:2222",  # /job:worker/task:0
        "machine-b.example.com:2222"   # /job:worker/task:1
    ],
    "ps": ["machine-a.example.com:2221"]  # /job:ps/task:0
}


# Starting a TensorFlow Cluster
The TF_CONFIG environment variable is used to specify the configuration for each task (worker or parameter server). For example, to configure the first worker, you can set

In [None]:
import os
import json

os.environ["TF_CONFIG"] = json.dumps({
    "cluster": cluster_spec,
    "task": {"type": "worker", "index": 0}
})


# Training with MultiWorkerMirroredStrategy
When using multiple workers for training, you can use TensorFlow’s MultiWorkerMirroredStrategy for synchronous training across multiple devices. This ensures that each worker performs the same computation in parallel, and updates the model synchronously

In [None]:
import tensorflow as tf
import tempfile

strategy = tf.distribute.MultiWorkerMirroredStrategy()  # The strategy

resolver = tf.distribute.cluster_resolver.TFConfigClusterResolver()  # Resolver for cluster info
print(f"Starting task {resolver.task_type} #{resolver.task_id}")

# Load and split dataset
with strategy.scope():
    model = tf.keras.Sequential([
        tf.keras.layers.Flatten(input_shape=[28, 28]),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dense(10, activation="softmax")
    ])  # Define the model

    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

    # Train the model
    model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=10)

# Chief saves the model, other workers save to temporary directories
if resolver.task_id == 0:  # Chief saves the model
    model.save("my_mnist_multiworker_model", save_format="tf")
else:
    tmpdir = tempfile.mkdtemp()  # Temporary directory for other workers
    model.save(tmpdir, save_format="tf")
    tf.io.gfile.rmtree(tmpdir)  # Clean up


# Communication Options
TensorFlow uses different communication algorithms (e.g., ring or NCCL) for synchronizing worker computations during training. You can explicitly set the communication strategy to use NCCL if needed for better performance on multi-GPU setup

In [None]:
strategy = tf.distribute.MultiWorkerMirroredStrategy(
    communication_options=tf.distribute.experimental.CommunicationOptions(
        implementation=tf.distribute.experimental.CollectiveCommunication.NCCL
    )
)


# Running on TPUs
If you have access to TPUs (e.g., via Google Cloud), you can use the TPUStrategy

In [None]:
resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.tpu.experimental.initialize_tpu_system(resolver)  # Initialize the TPU system
strategy = tf.distribute.TPUStrategy(resolver)


#Running on Google Cloud (Vertex AI)
For large-scale training, you can use Vertex AI to run distributed training jobs on Google Cloud. This can be done using the aiplatform.CustomTrainingJob API to create and run custom training jobs on multiple workers with GPUs.

In [None]:
from google.cloud import aiplatform

custom_training_job = aiplatform.CustomTrainingJob(
    display_name="my_custom_training_job",
    script_path="my_vertex_ai_training_task.py",
    container_uri="gcr.io/cloud-aiplatform/training/tf-gpu.2-4:latest",
    model_serving_container_image_uri=server_image,
    requirements=["gcsfs==2022.3.0"],  # Example
    staging_bucket=f"gs://{bucket_name}/staging"
)

mnist_model = custom_training_job.run(
    machine_type="n1-standard-4",
    replica_count=2,
    accelerator_type="NVIDIA_TESLA_K80",
    accelerator_count=2,
)


# Hyperparameter Tuning with Vertex AI
For optimizing hyperparameters, Vertex AI provides a hyperparameter tuning service that uses Bayesian optimization to efficiently search the hyperparameter space. You can pass hyperparameters as command-line arguments and use them in your training scrip

In [None]:
import argparse
import tensorflow as tf

parser = argparse.ArgumentParser()
parser.add_argument("--n_hidden", type=int, default=2)
parser.add_argument("--n_neurons", type=int, default=256)
parser.add_argument("--learning_rate", type=float, default=1e-2)
parser.add_argument("--optimizer", default="adam")
args = parser.parse_args()

def build_model(args):
    with tf.distribute.MirroredStrategy().scope():
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Flatten(input_shape=[28, 28], dtype=tf.uint8))
        for _ in range(args.n_hidden):
            model.add(tf.keras.layers.Dense(args.n_neurons, activation="relu"))
        model.add(tf.keras.layers.Dense(10, activation="softmax"))
        opt = tf.keras.optimizers.get(args.optimizer)
        opt.learning_rate = args.learning_rate
        model.compile(loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
    return model

model = build_model(args)
model.fit(X_train, y_train, epochs=10)
