In [1]:
import import_ipynb
from VGGModel import vgg
import tensorflow as tf
import os
import time
import glob
import random

importing Jupyter notebook from VGGModel.ipynb


In [2]:
os.environ['CUDA_DEVICE_ORDER'] = "PCI_BUS_ID"  # os.environ[“CUDA_DEVICE_ORDER”] = “PCI_BUS_ID” # 按照PCI_BUS_ID顺序从0开始排列GPU设备
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'        # 设置当前使用的GPU设备仅为0号设备  设备名称为'/gpu:0'

In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
logical_gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu,True)
    except RuntimeError as e:
        print(e)
        exit(-1)
logical_gpus

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]

In [8]:
data_root = os.path.abspath(os.path.join(os.getcwd(),"../../datasets"))
image_path = data_root+"/flower_data/"
train_dir = image_path+'train'
validation_dir = image_path+'val'

In [9]:
im_height = 224
im_width = 224
batch_size = 32
epochs = 10

In [10]:
if not os.path.exists('save_weights'):
    os.makedirs("save_weights")

In [11]:
# class dict
data_class = [cla for cla in os.listdir(train_dir) if '.txt' not in cla]
class_num = len(data_class)
class_dict = dict((value,index) for index,value in enumerate(data_class))
inverse_dict = dict((value,key) for key,value in class_dict.items())
inverse_dict

{0: 'dandelion', 1: 'daisy', 2: 'roses', 3: 'tulips', 4: 'sunflowers'}

In [12]:
# load train images list
train_image_list = glob.glob(train_dir+"/*/*.jpg")
random.shuffle(train_image_list)
train_num = len(train_image_list)
train_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in train_image_list]

In [13]:
# load validation images list
val_image_list = glob.glob(validation_dir+"/*/*.jpg")
random.shuffle(val_image_list)
val_num = len(val_image_list)
val_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in val_image_list]

In [14]:
def process_path(img_path,label):
    label = tf.one_hot(label,depth=class_num)
    image = tf.io.read_file(img_path)
    image = tf.image.decode_jpeg(image)
    image = tf.image.convert_image_dtype(image,tf.float32)
    image = tf.image.resize(image,[im_height,im_width])
    return image,label

In [15]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
AUTOTUNE

-1

In [16]:
# batch_size_per_replica = 32
# # Global batch size
# GLOBAL_BATCH_SIZE = batch_size_per_replica * strategy.num_replicas_in_sync
# # Buffer size for data loader
# BUFFER_SIZE = batch_size_per_replica * strategy.num_replicas_in_sync * 16

In [22]:
# load train dataset
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
    train_dataset = tf.data.Dataset.from_tensor_slices((train_image_list,train_label_list))
    train_dataset = train_dataset.shuffle(buffer_size=train_num)\
                                .map(process_path,num_parallel_calls=AUTOTUNE)\
                                .repeat().batch(batch_size).prefetch(AUTOTUNE)
    
    
    val_dataset = tf.data.Dataset.from_tensor_slices((val_image_list,val_label_list))
    val_dataset = val_dataset.map(process_path,num_parallel_calls=AUTOTUNE).repeat().batch(batch_size)
    
    train_dataset_distribute = strategy.experimental_distribute_dataset(train_dataset)
    val_dataset_distribute = strategy.experimental_distribute_dataset(val_dataset)

In [23]:

with strategy.scope():
    model=vgg('vgg16',224,224,5)
    model.summary()
    
    # use keras low level api for training
    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
    
    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')
    
    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')
    

    def train_step(images,labels):
        with tf.GradientTape() as tape:
            predictions = model(images,training=True)
            loss = loss_object(labels,predictions)
        gradients = tape.gradient(loss,model.trainable_variables)
        optimizer.apply_gradients(zip(gradients,model.trainable_variables))
        
        train_loss(loss)
        train_accuracy(labels,predictions)
        

    def test_step(images,labels):
        predictions = model(images,training=False)
        t_loss = loss_object(labels,predictions)
        
        test_loss(t_loss)
        test_accuracy(labels,predictions)
        
    best_test_loss = float('inf')
    train_step_num = train_num//batch_size
    val_step_num = val_num//batch_size
    for epoch in range(epochs+1):
        train_loss.reset_states()
        train_accuracy.reset_states()
        test_loss.reset_states()
        test_accuracy.reset_states()
        
        t1 = time.perf_counter()
        for index,(images,labels) in enumerate(train_dataset):
            train_step(images,labels)
            if index+1 == train_step_num:
                break
        print(time.perf_counter-t1)
        
        for index,(images,labels) in enumerate(val_dataset):
            test_step(images,labels)
            if index+1 == val_step_num:
                break
        template = 'Epoch {},loss: {},Accuracy: {},Test Loss: {}'
        print(template.format(epoch,
                             train_loss.result(),
                             train_accuracy.result()*100,
                             test_loss.result(),
                             test_accuracy.result()*100))
        if test_loss.result() < best_test_loss:
            model.save_weights('./save_weights/myVGG_{}.ckpt'.format(epoch),save_format='tf')
            

Model: "model_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
feature (Sequential)         (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten_4 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dropout_8 (Dropout)          (None, 25088)             0         
_________________________________________________________________
dense_12 (Dense)             (None, 2048)              51382272  
_________________________________________________________________
dropout_9 (Dropout)          (None, 2048)              0         
_________________________________________________________________
dense_13 (Dense)             (None, 2048)              4196

ValueError: Please use `tf.keras.losses.Reduction.SUM` or `tf.keras.losses.Reduction.NONE` for loss reduction when losses are used with `tf.distribute.Strategy` outside of the built-in training loops. You can implement `tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE` using global batch size like:
```
with strategy.scope():
    loss_obj = tf.keras.losses.CategoricalCrossentropy(reduction=tf.keras.losses.reduction.NONE)
....
    loss = tf.reduce_sum(loss_obj(labels, predictions)) * (1. / global_batch_size)
```
Please see https://www.tensorflow.org/alpha/tutorials/distribute/training_loops for more details.