# Training Models on Colab

Install TensorFlow and Numpy

In [9]:
!pip install --upgrade pip
!pip install --upgrade protobuf 

Collecting pip
[?25l  Downloading https://files.pythonhosted.org/packages/bd/b1/56a834acdbe23b486dea16aaf4c27ed28eb292695b90d01dff96c96597de/pip-20.2.1-py2.py3-none-any.whl (1.5MB)
[K     |▏                               | 10kB 17.4MB/s eta 0:00:01[K     |▍                               | 20kB 4.0MB/s eta 0:00:01[K     |▋                               | 30kB 4.1MB/s eta 0:00:01[K     |▉                               | 40kB 4.5MB/s eta 0:00:01[K     |█                               | 51kB 4.9MB/s eta 0:00:01[K     |█▎                              | 61kB 5.3MB/s eta 0:00:01[K     |█▌                              | 71kB 5.6MB/s eta 0:00:01[K     |█▊                              | 81kB 5.9MB/s eta 0:00:01[K     |██                              | 92kB 5.8MB/s eta 0:00:01[K     |██▏                             | 102kB 6.1MB/s eta 0:00:01[K     |██▍                             | 112kB 6.1MB/s eta 0:00:01[K     |██▋                             | 122kB 6.1MB/s eta 0:00:

In [2]:
%tensorflow_version 1.15
import tensorflow as tf
print(tf.__version__)

!pip install numpy

`%tensorflow_version` only switches the major version: 1.x or 2.x.
You set: `1.15`. This will be interpreted as: `1.x`.


TensorFlow 1.x selected.
1.15.2


Check GPU status

In [3]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
   raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [4]:
# memory footprint support libraries/code
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm() 

Collecting gputil
  Downloading https://files.pythonhosted.org/packages/ed/0e/5c61eedde9f6c87713e89d794f01e378cfd9565847d4576fa627d758c554/GPUtil-1.4.0.tar.gz
Building wheels for collected packages: gputil
  Building wheel for gputil (setup.py) ... [?25l[?25hdone
  Created wheel for gputil: filename=GPUtil-1.4.0-cp36-none-any.whl size=7413 sha256=c020ca26e42b8b6e99f7fca3fae563a7a8e69e3682697fcf0886151c9fd18cbf
  Stored in directory: /root/.cache/pip/wheels/3d/77/07/80562de4bb0786e5ea186911a2c831fdd0018bda69beab71fd
Successfully built gputil
Installing collected packages: gputil
Successfully installed gputil-1.4.0
Gen RAM Free: 12.6 GB  | Proc size: 421.0 MB
GPU RAM Free: 11372MB | Used: 69MB | Util   1% | Total 11441MB


Mount Google Drive folder

In [5]:
from google.colab import drive
drive.mount('/content/gdrive')

# change to working tensorflow directory on the drive
%cd '/content/gdrive/My Drive/tensorflow/models/'

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive
/content/gdrive/My Drive/tensorflow/models


Install protobuf and compile, install setup.py

In [None]:
!apt-get install protobuf-compiler python-pil python-lxml python-tk
!pip install Cython
%cd /content/gdrive/My Drive/tensorflow/models/research/
!protoc object_detection/protos/*.proto --python_out=.

import os
os.environ['PYTHONPATH'] += ':/content/gdrive/My Drive/tensorflow/models/research/:/content/gdrive/My Drive/tensorflow/models/research/slim'

!python setup.py build
!python setup.py install
!pip install pycocotools

Reading package lists... Done
Building dependency tree       
Reading state information... Done
protobuf-compiler is already the newest version (3.0.0-9.1ubuntu1).
python-lxml is already the newest version (4.2.1-1ubuntu0.1).
python-pil is already the newest version (5.1.0-1ubuntu0.3).
python-tk is already the newest version (2.7.17-1~18.04).
The following package was automatically installed and is no longer required:
  libnvidia-common-440
Use 'apt autoremove' to remove it.
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.
/content/gdrive/My Drive/tensorflow/models/research
running build
running build_py
copying object_detection/__init__.py -> build/lib/object_detection
copying object_detection/eval.py -> build/lib/object_detection
copying object_detection/evaluator.py -> build/lib/object_detection
copying object_detection/trainer_test.py -> build/lib/object_detection
copying object_detection/trainer.py -> build/lib/object_detection
copying object_detection/video.py -> b

Check remaining GPU time

In [None]:
import time, psutil
Start = time.time()- psutil.boot_time()
Left= 12*3600 - Start
print('Time remaining for this session is: ', Left/3600)

Time remaining for this session is:  11.780559118853676


Start training

In [None]:
!pip install tf_slim
%cd /content/gdrive/My Drive/tensorflow/models/research/object_detection
os.environ['PYTHONPATH'] += ':/content/gdrive/My Drive/tensorflow/models/research/:/content/gdrive/My Drive/tensorflow/models/research/slim'

!python train.py --train_dir=training/ --pipeline_config_path=training/ssd_mobilenet_v1_pets.config --logtostderr

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
I0806 00:15:22.896668 140067333384064 learning.py:507] global step 5003: loss = 1.4378 (0.361 sec/step)
INFO:tensorflow:global step 5004: loss = 1.1616 (0.357 sec/step)
I0806 00:15:23.255597 140067333384064 learning.py:507] global step 5004: loss = 1.1616 (0.357 sec/step)
INFO:tensorflow:global step 5005: loss = 1.0593 (0.355 sec/step)
I0806 00:15:23.612529 140067333384064 learning.py:507] global step 5005: loss = 1.0593 (0.355 sec/step)
INFO:tensorflow:global step 5006: loss = 1.1683 (0.366 sec/step)
I0806 00:15:23.980054 140067333384064 learning.py:507] global step 5006: loss = 1.1683 (0.366 sec/step)
INFO:tensorflow:global step 5007: loss = 0.7183 (0.363 sec/step)
I0806 00:15:24.345002 140067333384064 learning.py:507] global step 5007: loss = 0.7183 (0.363 sec/step)
INFO:tensorflow:global step 5008: loss = 0.9311 (0.351 sec/step)
I0806 00:15:24.696920 140067333384064 learning.py:507] global step 5008: loss = 0.9311 (0.

Export inference graph

In [12]:
#  .ckpt needs to be updated every time to match last .ckpt generated
#  .config needs to be updated when changing model
!python export_inference_graph.py --input_type image_tensor --pipeline_config_path training/ssd_mobilenet_v1_pets.config --trained_checkpoint_prefix training/model.ckpt-69431 --output_directory new_graph

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



W0806 07:00:02.685358 140333095344000 module_wrapper.py:139] From export_inference_graph.py:145: The name tf.gfile.GFile is deprecated. Please use tf.io.gfile.GFile instead.


W0806 07:00:03.044758 140333095344000 module_wrapper.py:139] From /content/gdrive/My Drive/tensorflow/models/research/object_detection/exporter.py:402: The name tf.gfile.MakeDirs is deprecated. Please use tf.io.gfile.makedirs instead.


W0806 07:00:03.047240 140333095344000 module_wrapper.py:139] From /content/gdrive/My Drive/tensorflow/models/research/object_detection/exporter.py:121: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeh

Zip file in Google Drive

In [13]:
!zip -r model_graph.zip new_graph

  adding: new_graph/ (stored 0%)
  adding: new_graph/model.ckpt.data-00000-of-00001 (deflated 7%)
  adding: new_graph/model.ckpt.index (deflated 67%)
  adding: new_graph/checkpoint (deflated 42%)
  adding: new_graph/model.ckpt.meta (deflated 93%)
  adding: new_graph/frozen_inference_graph.pb (deflated 9%)
  adding: new_graph/saved_model/ (stored 0%)
  adding: new_graph/saved_model/variables/ (stored 0%)
  adding: new_graph/saved_model/saved_model.pb (deflated 9%)
  adding: new_graph/pipeline.config (deflated 69%)
