In [9]:
import os
os.chdir('C:/Users/Abhilash/PycharmProjects/Instance segmentation')
os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'

import Libraries 

In [2]:
import tensorflow as tf
import numpy as np
from DataPipeline import get_dataset
from model import DeepMask
from Loss  import Joint_loss
from train import train
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.regularizers import L1L2
from tensorflow.train import Checkpoint
from datetime import datetime
physical_devices = tf.config.list_physical_devices('GPU') 
try:
  tf.config.experimental.set_memory_growth(physical_devices[0], True)
except:
  # Invalid device or cannot modify virtual devices once initialized.
  pass

creating objects of models,loss and optimizer

In [3]:
input_shape = (224,224,3)
output_shape = (224,224)
kernel_regularizer = L1L2(l2 = 0.00005)
model = DeepMask(input_shape,output_shape, kernel_regularizer = kernel_regularizer)
sgd   = SGD(learning_rate = 0.001,momentum = 0.9)
loss_func  = Joint_loss()

getting dataset

In [4]:
Batch_size = 10
datadir   = 'F:/datasets/COCO 2017'
train_dataset = get_dataset(datadir,'train',output_shape)
batched_train_dataset = train_dataset.shuffle(10).repeat().batch(Batch_size)
val_dataset = get_dataset(datadir,'val',output_shape,use_aug = False)
batched_val_dataset = train_dataset.shuffle(10).repeat().batch(Batch_size)

In [5]:
checkpoint = Checkpoint(model = model,optimizer = sgd, epoch = tf.Variable(0))
checkpoint_dir = 'F:/datasets/COCO 2017/checkpoint'
checkpoint_prefix = os.path.join(checkpoint_dir,'ckpt')
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x1c5fc3b1e88>

In [6]:
current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = 'logs/stats/' + current_time + '/train'
test_log_dir = 'logs/stats/' + current_time + '/test'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)

In [7]:
import tensorflow as tf
import time
import sys
def get_data_string(steps,current_step,loss, current_time):
   total_bar = 50
   increase_step = int(steps/total_bar + 0.5)
   for i in range(0,current_step+1):
      if i%increase_step == 0:
        dashes = int(i/increase_step + 0.5)
        dots   = total_bar - dashes
        string = '='*( dashes - 1)
        if not i == steps and not i == 0:
          string += '>'
        string += '.'*(dots - 1)

   data_String = '{0}/{1} :'.format(current_step,steps) + string
   data_String += ' Loss: ' + str(loss) + ' Time: ' + str(round(current_time,3))
   return data_String

@tf.function
def train_one_step(inp, y_true,loss_func,optimizer,apply_regularization = False):

    with tf.GradientTape() as tape:
        y_pred = model(inp)
        loss   = loss_func(y_true, y_pred)
        if apply_regularization:
            loss += tf.reduce_sum(model.losses)
        grads  = tape.gradient(loss,model.trainable_variables)
        optimizer.apply_gradients(zip(grads,model.trainable_variables))

    return loss


def train(train_dataset, val_dataset, epochs, steps_per_epoch, val_steps, checkpoint, checkpoint_prefix, loss_func, optimizer,train_writer,test_writer, save_after = None,apply_regularization = False):
    epoch = checkpoint.epoch.numpy() 

    while epoch < epochs:
        tf.profiler.experimental.start('profile-logs')
        step  = 0
        avg_loss = 0.
        print('Epoch :' + str(epoch))
        now = time.time()
        for img,mask,score in train_dataset.take(steps_per_epoch):you
            y_true = {'mask':mask, 'score':score}
            loss = train_one_step(img,y_true,loss_func,optimizer,apply_regularization)
            loss       = float(loss)
            avg_loss   = (avg_loss*step + loss)/(step + 1)
            
            string = get_data_string(steps_per_epoch, step, avg_loss, time.time() - now)
            sys.stdout.write('\r' + string)
            time.sleep(0.01)
            step += 1
        step = 0
        val_avg_loss = 0.
        if tf.math.is_nan(avg_loss):
            with train_writer.as_default():
                tf.summary.image('image',img,epoch,max_outputs =10)
            break
        epoch += 1
        checkpoint.epoch.assign_add(1)    
        for img, mask, score in val_dataset.take(val_steps):
            y_true = {'mask': mask, 'score': score}
            y_pred = model(img)
            loss   = loss_func(y_true,y_pred)
            loss   = float(loss)
            val_avg_loss = (val_avg_loss * step + loss) / (step + 1)
            step += 1
        string += ' Val Loss: ' + str(val_avg_loss)
        after   = time.time()
        string += ' Time Taken: {0}'.format(round(after - now,3))
        sys.stdout.write('\r' + string)
        print()
        print(tf.config.experimental.get_memory_usage('GPU:0'))
        
        with train_writer.as_default():
          tf.summary.scalar('Loss',avg_loss,epoch)
        with test_writer.as_default():
          tf.summary.scalar('Val_Loss',val_avg_loss,epoch)
        if save_after is not None:
            if epoch%save_after == 0:
                checkpoint.save(checkpoint_prefix)
        tf.profiler.experimental.stop()

        
    return model

In [11]:
epochs = 12
steps_per_epoch = 5000
val_steps = 50
#tf.profiler.experimental.server.start(6009)
#tf.profiler.experimental.client.trace('grpc://localhost:6009','logs', 20000)
model = train(batched_train_dataset,
              batched_val_dataset,
              epochs,steps_per_epoch,
              val_steps,checkpoint,
              checkpoint_prefix,
              loss_func,sgd,train_summary_writer,
              test_summary_writer,save_after = 10,
             apply_regularization = True
             )

Epoch :10
1076711680
Epoch :11
136/5000 :>................................................ Loss: 19.475798027358792 Time: 87.948

KeyboardInterrupt: 

In [12]:
checkpoint.save(checkpoint_prefix)

'F:/datasets/COCO 2017/checkpoint\\ckpt-7'

In [None]:
masks = output['mask']

In [None]:
import matplotlib.pyplot as plt
for image,mask in zip(img,masks):
    plt.subplot(1,2,1)
    plt.imshow(image)
    mask = np.where(mask<0.1 ,0.,1.0)
    plt.subplot(1,2,2)
    plt.imshow(mask,cmap = 'gray')
    plt.show()
    

In [None]:
tf.summary.image()