-
Notifications
You must be signed in to change notification settings - Fork 178
/
start_agent_and_inf_server.sh
68 lines (63 loc) · 2.09 KB
/
start_agent_and_inf_server.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/bin/bash
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Install NVIDIA driver and create GPU utilization service.
# Install NVIDIA driver
sh /opt/deeplearning/install-driver.sh
# GPU Agent
git clone https://github.com/GoogleCloudPlatform/ml-on-gcp.git
cd ml-on-gcp/dlvm/gcp-gpu-utilization-metrics
# Install Python dependencies.
pip install -r ./requirements.txt
cp ./report_gpu_metrics.py /root/report_gpu_metrics.py
# Generate GPU service.
cat <<-EOH > /lib/systemd/system/gpu_utilization_agent.service
[Unit]
Description=GPU Utilization Metric Agent
[Service]
Type=simple
PIDFile=/run/gpu_agent.pid
ExecStart=/bin/bash --login -c '/usr/bin/python /root/report_gpu_metrics.py'
User=root
Group=root
WorkingDirectory=/
Restart=always
[Install]
WantedBy=multi-user.target
EOH
# Reload systemd manager configuration
systemctl daemon-reload
# Enable gpu_utilization_agent service
systemctl --no-reload --now enable /lib/systemd/system/gpu_utilization_agent.service
# Generate TF Service service.
cat <<-EOH > /lib/systemd/system/tfserve.service
[Unit]
Description=Inf Logic
[Service]
Type=simple
PIDFile=/run/tfserve_agent.pid
ExecStart=/bin/bash --login -c '/usr/local/bin/tensorflow_model_server --model_base_path=/root/resnet_v2_int8_NCHW/ --rest_api_port=8888'
User=root
Group=root
WorkingDirectory=/
Restart=always
[Install]
WantedBy=multi-user.target
EOH
gsutil cp gs://cloud-samples-data/dlvm/t4/model.tar.gz /root/model.tar.gz
tar -xzvf /root/model.tar.gz -C /root
# Reload systemd manager configuration
systemctl daemon-reload
# Enable tfserve service
systemctl --no-reload --now enable /lib/systemd/system/tfserve.service