# Delf Train Tutorial

By looking this you can grasp how to train on jupyter notebook.

In [1]:
import tensorflow as tf
import os 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.logging.set_verbosity(tf.logging.ERROR)
from delf_train import Config, DelfTrainerV1

resnet_v1_50.ckpt download is completed


#### Make config object

- data_path    :  dataset_path, your dataset_path should be divided by "/train", "/test" dataset.     
    - data_path/train  
        - /dog
            - /img1
            - /img2
        - /mug
        - /cat
    - data_path/test   
        - test_img1
        - test_img2

- train_step   :  "resnet_finetune" or "att_learning".   
- save_name    :  check point file name to save.   
- nb_epoch     :  number of total epoch.  
- fc_epoch     :  when finetune resnet, fully connected layer learning epoch. Usually 10 epoch is enough. 
- restore_file :  restore checkpoint file path.   
- ckpt_type    :  "resnet_ckpt" or "attention_ckpt". This value manage variable which will be loaded from restore_file.   

#### finetune config

In [2]:
ft_config = Config()
ft_config.data_path = "/home/soma03/data/paris"
ft_config.train_step = "resnet_finetune"
#config.train_step = "att_learning"
ft_config.save_name = 'local_ckpt/ft'
ft_config.restore_file = 'resnet_v1_50.ckpt'
ft_config.fc_learning_rate = 0.0001
ft_config.nb_epoch = 3
ft_config.fc_epoch = 2
ft_config.ckpt_type = 'resnet_ckpt'
#config.ckpt_type = 'attention_ckpt'

In [3]:
trainer = DelfTrainerV1(ft_config)

[ check_train_dataset ] function : 0.001 s
[ check_infer_dataset ] function : 0.001 s
[ _label_to_int ] function : 0.000 s
[ load_dataset ] function : 0.014 s
[ number of file names ] = 6392
[ classes ] = 12
[ _parse_function ] function : 0.004 s
[ _parse_function ] function : 0.003 s
[ build_resnet ] function : 1.617 s
=== weights loaded === 


In [4]:
trainer.run()

= batches =: 89
Starting epoch 1 / 3
0/89 batch_acc : 0.078125, batch_loss : 3.058543
10/89 batch_acc : 0.078125, batch_loss : 2.752277
20/89 batch_acc : 0.140625, batch_loss : 2.569234
30/89 batch_acc : 0.234375, batch_loss : 2.412732
40/89 batch_acc : 0.500000, batch_loss : 2.158345
50/89 batch_acc : 0.390625, batch_loss : 2.243457
60/89 batch_acc : 0.468750, batch_loss : 2.146315
70/89 batch_acc : 0.406250, batch_loss : 2.004700
80/89 batch_acc : 0.453125, batch_loss : 1.990900
epoch: 1, train_loss: 2.296337, train_acc: 0.337079, val_acc: 0.517188
Training time for one epoch : 16.1
Starting epoch 2 / 3
0/89 batch_acc : 0.500000, batch_loss : 1.881639
10/89 batch_acc : 0.593750, batch_loss : 1.695434
20/89 batch_acc : 0.531250, batch_loss : 1.783291
30/89 batch_acc : 0.609375, batch_loss : 1.742352
40/89 batch_acc : 0.625000, batch_loss : 1.761551
50/89 batch_acc : 0.578125, batch_loss : 1.671657
60/89 batch_acc : 0.578125, batch_loss : 1.649040
70/89 batch_acc : 0.578125, batch_loss

#### frequent Error
If you have Error message Like   
"InvalidArgumentError: Expected image (JPEG, PNG, or GIF), got unknown format starting with '<!DOCTYPE HTML P'"    
This usually means there is a trash img file in your data_path directory.   
So I can offer a command that find and remove it.   


```bash
# in your data_path directory type this.
find ./ -type f -size -20 -exec rm {} \;
```

In [5]:
tf.reset_default_graph()

#### set attention config

In [6]:
att_config = Config()
att_config.data_path = "/home/soma03/data/paris"
# att_config.train_step = "resnet_finetune"
att_config.train_step = "att_learning"
att_config.save_name = 'local_ckpt/att'
att_config.restore_file = 'local_ckpt/ft_3'
ft_config.att_learning_rate = 0.0001
att_config.nb_epoch = 5
att_config.ckpt_type = 'resnet_ckpt'
# ft_config.ckpt_type = 'attention_ckpt'

In [7]:
att_trainer = DelfTrainerV1(att_config)

[ check_train_dataset ] function : 0.001 s
[ check_infer_dataset ] function : 0.001 s
[ _label_to_int ] function : 0.001 s
[ load_dataset ] function : 0.024 s
[ number of file names ] = 6392
[ classes ] = 12
[ _parse_function ] function : 0.005 s
[ _parse_function ] function : 0.004 s
[ build_attention ] function : 1.331 s
=== weights loaded === 


In [8]:
att_trainer.run()

= batches =: 89
Starting epoch 1 / 5
0/89 batch_acc : 0.062500, batch_loss : 2.690763
10/89 batch_acc : 0.093750, batch_loss : 2.681996
20/89 batch_acc : 0.125000, batch_loss : 2.673540
30/89 batch_acc : 0.187500, batch_loss : 2.654790
40/89 batch_acc : 0.218750, batch_loss : 2.583465
50/89 batch_acc : 0.156250, batch_loss : 2.566812
60/89 batch_acc : 0.140625, batch_loss : 2.521439
70/89 batch_acc : 0.140625, batch_loss : 2.482640
80/89 batch_acc : 0.109375, batch_loss : 2.495832
epoch: 1, train_loss: 2.585510, train_acc: 0.162746, val_acc: 0.181250
Training time for one epoch : 14.6
Starting epoch 2 / 5
0/89 batch_acc : 0.187500, batch_loss : 2.389693
10/89 batch_acc : 0.234375, batch_loss : 2.357510
20/89 batch_acc : 0.406250, batch_loss : 2.254015
30/89 batch_acc : 0.281250, batch_loss : 2.327265
40/89 batch_acc : 0.250000, batch_loss : 2.322549
50/89 batch_acc : 0.281250, batch_loss : 2.422411
60/89 batch_acc : 0.328125, batch_loss : 2.300453
70/89 batch_acc : 0.328125, batch_loss

In [9]:
tf.reset_default_graph()

#### continue attention learning 

In [10]:
cont_att_config = Config()
cont_att_config.data_path = "/home/soma03/data/paris"
# cont_att_config.train_step = "resnet_finetune"
cont_att_config.train_step = "att_learning"
cont_att_config.save_name = 'local_ckpt/cont_att'
cont_att_config.restore_file = 'local_ckpt/att_5'
cont_att_config.nb_epoch = 3
# cont_att_config.ckpt_type = 'resnet_ckpt'
cont_att_config.ckpt_type = 'attention_ckpt'

In [11]:
cont_att_trainer = DelfTrainerV1(cont_att_config)

[ check_train_dataset ] function : 0.001 s
[ check_infer_dataset ] function : 0.000 s
[ _label_to_int ] function : 0.001 s
[ load_dataset ] function : 0.015 s
[ number of file names ] = 6392
[ classes ] = 12
[ _parse_function ] function : 0.005 s
[ _parse_function ] function : 0.004 s
[ build_attention ] function : 1.393 s
=== weights loaded === 


In [None]:
cont_att_trainer.run()

= batches =: 89
Starting epoch 1 / 3
0/89 batch_acc : 0.531250, batch_loss : 1.711880
10/89 batch_acc : 0.484375, batch_loss : 1.713839
20/89 batch_acc : 0.546875, batch_loss : 1.635017
30/89 batch_acc : 0.453125, batch_loss : 1.899761
40/89 batch_acc : 0.468750, batch_loss : 1.828340
50/89 batch_acc : 0.562500, batch_loss : 1.572291
60/89 batch_acc : 0.500000, batch_loss : 1.726810
70/89 batch_acc : 0.578125, batch_loss : 1.638733
80/89 batch_acc : 0.546875, batch_loss : 1.568192
