# Experiment template. 
## Note: 
template for experiments

In [1]:
# description = {here could be a short description of the experiment}

# specific name of the experiment
eval_name = 'allInoc_adadelta_8nfilters_debug'

if eval_name is None:
    with open(path_to_dir+'eval_name.txt') as data_file:    
        eval_name = json.load(data_file)
print "eval_name is", eval_name 

eval_name is allInoc_adadelta_8nfilters_debug


## The task is to predict number of lesions in the photo
***
### Content:
* [Settings and experiment parameters](#sep)
* [Load Data](#ld)
* [Learning and visualizing results](#lav)
* [Conclusions](#c)
***

#### Togle ON/OFF the raw code

In [2]:
from IPython.display import HTML
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
CODE WAS HIDDEN. TO TOGGLE ON/OFF THE RAW CODE, CLICK
<a href="javascript:code_toggle()">here</a>.''')

---
<a name="sep"/>
# Settings and experiment parameters
</a>

In [2]:
%env THEANO_FLAGS="device=gpu2"

### Check theano ####
import theano

env: THEANO_FLAGS="device=gpu2"


Using gpu device 2: Tesla K40m (CNMeM is disabled, cuDNN 5005)


### Global variables and paths 
* $\textbf{Add the main directory '.../code' to   sys.path}$. 

The following directory was added:

In [3]:
#### Add the main dir to sys ####

import os, sys

parentdir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath('__file__'))))
print parentdir

sys.path.insert(0, parentdir)

/slowhome/makarova/columbia/code


In [4]:
%load_ext autoreload
%autoreload 2

* $\textbf{Put your certain values or None}$ (then use run.ipynb to set them outside).

Note: In case you put None, files with parameters should be in the same directory with this .ipynb file 
(else change path_to_dir by what ever you want)


In [5]:
TRAIN_NN = True

N_FILTERS = 4
BATCH_SIZE = 20
N_EPOCHS = 3000
N_BATCHES_PER_EPOCH = 10
N_BATCHES_PER_EPOCH_valid = 10
PATCH_SIZE = 256

In [6]:
# Read global params from files
import json

print "N_FILTERS = ", N_FILTERS
print "BATCH_SIZE = ", BATCH_SIZE
print "N_EPOCHS = ", N_EPOCHS
print "PATCH SIZE = ", PATCH_SIZE

N_FILTERS =  4
BATCH_SIZE =  20
N_EPOCHS =  3000
PATCH SIZE =  256


In [7]:
# txt files with paths to segmentation image and input image
txt_train = '/home/makarova/columbia/data/inoculated_1/gt_img_train_ellimg_small.txt'
txt_valid = '/home/makarova/columbia/data/inoculated_1/gt_img_valid_ellimg_small.txt'
txt_test = '/home/makarova/columbia/data/inoculated_1/gt_img_test_ellimg_small.txt'

from config import results_path
# path to save the results for THIS experiment
results_eval_path = results_path + eval_name + "/"

print txt_train
print txt_valid
print txt_test

print results_path

/home/makarova/columbia/data/inoculated_1/gt_img_train_ellimg_small.txt
/home/makarova/columbia/data/inoculated_1/gt_img_valid_ellimg_small.txt
/home/makarova/columbia/data/inoculated_1/gt_img_test_ellimg_small.txt
/home/makarova/columbia/code/results/


In [8]:
##### IMPORTS ####
import matplotlib
matplotlib.use('Pdf')

import numpy as np 
import matplotlib.pyplot as plt
%matplotlib inline

---
<a name='ld'/>
# Load Data
</a>

* Load and visualize data

In [9]:
from PIL import Image

def load_data(txt):
    
    imgs = []
    imgs_gt = []
    
    with open(txt, 'r') as fin:
        lines = fin.read().splitlines()
    for line in lines:
        imgs.append(line.split(' ')[1]) 
        imgs_gt.append(line.split(' ')[0])
    
    assert(len(imgs) == len(imgs_gt))
    imgs.sort()
    imgs_gt.sort()
    
    # images are 2000 by 3000 pixels each
    img_size = (2000, 3000)
    data = np.zeros((len(imgs), 3, img_size[0], img_size[1]), dtype=np.uint8)
    target = np.zeros((len(imgs), 1,  img_size[0], img_size[1]), dtype=np.uint8)
    
    ctr = 0
    for i, (im, gt_im) in enumerate(zip(imgs, imgs_gt)):
        data[ctr] = plt.imread(im).transpose((2, 0, 1))
        img = plt.imread(gt_im,0)
        target[ctr, 0] = img/255.
        ctr += 1
    return data, target

In [10]:
X_train, Y_train = load_data(txt_train)
X_valid, Y_valid = load_data(txt_valid)
X_test, Y_test = load_data(txt_test)

In [11]:
print X_train.shape
print Y_train.min()

(400, 3, 2000, 3000)
0


In [12]:
print X_train.max()

255


In [13]:
print Y_train.shape

(400, 1, 2000, 3000)


---
<a name='lav'/>
# Learning and Visualizing
</a>

In [14]:
from utils.generator import batch_generator, random_crop_generator, threaded_generator

* $ \textbf{ Characteristics of input images and the input layer}$

In [15]:
#### PREPARE DATA FOR NN ####

import theano.tensor as T
import lasagne
from lasagne.layers import InputLayer

nmb_channels, inp_shape = X_train[0].shape[0], X_train[0].shape[1:]
print 'Image shape', inp_shape
print "Number of channels:", nmb_channels

X_inp = T.tensor4('X_inp')
X_layer = InputLayer([BATCH_SIZE, nmb_channels, PATCH_SIZE, PATCH_SIZE], \
                     input_var=X_inp,name='input')

print "Input layer shape:", X_layer.output_shape

Image shape (2000, 3000)
Number of channels: 3
Input layer shape: [20, 3, 256, 256]


# NN

In [16]:
from models.unet import uNet

* $ \textbf{ Characteristics of NN}$

In [17]:
#### LOAD NN ####
cnn = uNet(X_layer,n_filters=N_FILTERS,nmb_out_classes=2)

self.net['deconv1'].shape, self.net['contr_4_2'].shape (20, 64, 32, 32) (20, 32, 32, 32)
self.net['output_segmentation'] (20, 2, 256, 256)
self.net['dimshuffle'] (2, 20, 256, 256)
self.net['reshapeSeg'] (2, 1310720)
self.net['dimshuffle2'] (1310720, 2)
self.net['output_flattened'] (2, 1310720)
(20, 2, 256, 256) (2, 1310720)


In [18]:
### WEIGHTS sanity check ###
total_weights = int(T.sum([T.prod(w.shape) for w in cnn.weights]).eval())
print "Total weights:", total_weights

Total weights: 123030


* $ \textbf{Objective loss, updates, train and eval fuctions}$

In [19]:
#### GROUND TRUTH and CLASS_WEIGHTS theano vectors ####
Y_gt = T.ivector('Target Y integer')
weights = T.vector('Loss weights')
ce,reg_l2, acc = cnn.get_loss_components(Y_gt, weights)
loss = ce+reg_l2

In [20]:
prediction_train = cnn.pred_y
print prediction_train

Softmax.0


In [21]:
import lasagne
updates = lasagne.updates.adadelta(loss,cnn.weights)

In [22]:
# create a convenience function to get the segmentation
seg_output = lasagne.layers.get_output(cnn.outlayer_seg,X_inp)
seg_output = seg_output.argmax(1)

In [23]:
prediction_test = lasagne.layers.get_output(cnn.outlayer_for_loss, X_inp, deterministic=True)

In [24]:
train_func = theano.function([X_inp,Y_gt,weights], [ce,reg_l2,acc], updates=updates)

In [25]:
#### FOR DEBUG ####
# from theano.compile.debugmode import DebugMode
# theano.config.exception_verbosity='high'
# T.cmp_sloppy=1
# train_func = theano.function([X_inp,Y_gt], [loss, acc_train], updates=updates, mode=DebugMode(check_isfinite=0))

In [26]:
eval_func = theano.function([X_inp, Y_gt, weights], [ce,reg_l2,acc])

In [27]:
get_segmentation = theano.function([X_inp], seg_output)

In [28]:
#### Note: Set weights for classes here ####
lesion_weight = 1.
nonlesion_weight = 0.1

* $ \textbf{Train NN}$

In [30]:
from utils.persistence import *
import time, datetime, pytz
from utils.persistence import *
from visualizers.metrics import Metrics
metrics = Metrics()
from datetime import datetime as dt

def train_nn (N_EPOCHS):
    
    with open(logs_path, "a+") as logs:
                cur_time = dt.now(pytz.timezone('US/Eastern')).time()
                logs.write("\n\nEval {}, \nCurrent time {} \n"\
               .format(eval_name, cur_time.strftime("%Y-%m-%d %H:%M")))
                
    losses = []    
    epoch = 1

    t_start = time.time()
    
    for i in range(N_EPOCHS):
        print "epoch", epoch
        n_batches = 0
        t0 = time.time()
        for j in range(N_BATCHES_PER_EPOCH):
            data, target = train_generator.next()
            target_flat = target.flatten()
            if (i==0 and j==0): print '0 {}, 1 {}'.format(np.sum(target==0), np.sum(target==1))
            # make a binary vector of weights for target elements
            weights_target = np.where(target_flat<0.5, nonlesion_weight, lesion_weight).astype(np.float32)
            ce_i, reg_i, acc_i = train_func(data.astype(np.float32),target_flat,weights_target)   
            print 'ce_i, reg_i, acc_i', np.round(ce_i,3), np.round(reg_i,3), np.round(acc_i,3)
            loss_i = ce_i+reg_i
            
            metrics["train loss"][epoch] = ce_i
            metrics["train full objective"][epoch] = loss_i
            metrics["train reg"][epoch] = reg_i
            losses.append(loss_i)   
            n_batches += 1
            if n_batches > N_BATCHES_PER_EPOCH:
                break

        if epoch%10==0:
            data, target = validation_generator.next()
            target_flat = target.flatten()
            weights_target = np.where(target_flat<0.5, nonlesion_weight, lesion_weight).astype(np.float32)
            ce_i, reg_i, acc_i = eval_func(data.astype(np.float32), target_flat, weights_target)
#             print 'eval: ce_i, reg_i, acc_i', ce_i, reg_i, acc_i
            loss_i = ce_i+reg_i
            
            metrics["test loss"][epoch] = ce_i
            metrics["test full objective"][epoch] = loss_i
            metrics["test reg"][epoch] = reg_i 


        if epoch%10==0:
            print "epoch:",epoch
            print 'mean loss for the last 10 epochs:', np.round(np.mean(losses[-10:]),3)
            with open(logs_path, "a+") as logs:
                cur_time = dt.now(pytz.timezone('US/Eastern'))
                logs.write("""\nEpoch {}, \nCurrent time {} \nmean loss for the last 10 epochs:{}"""\
               .format(epoch,cur_time.strftime("%Y-%m-%d %H:%M"), np.round(np.mean(losses[-10:]),3)))

        if epoch%100==0:
            # plot and save metrics
            fig = plt.figure(figsize=[15,5])
            path_save_plot = "MetricsEpoch{}.png".format(epoch)
            metrics.plot(save=True, path_to_save= path_save_plot)
            # weights snapshort
            plt.close()
            file_weights_path = eval_name + '_weights{}epoch'.format(epoch) + '.pickle'
            save(cnn.outlayer_for_loss, file_weights_path)
            
            plot_some_results(validation_generator, test_gen, BATCH_SIZE, 
                              info=True, n_images=4,path_to_save='.')

        epoch+=1
        print 'time for epoch: {} mins'\
        .format(round((time.time() - t0)/60.0, 3))
    print 'Overall time: {} mins'.format(round((time.time() - t_start)/60.0, 3))

In [31]:
train_generator = random_crop_generator(batch_generator(X_train, Y_train, BATCH_SIZE), 
                                        info=True, crop_size=PATCH_SIZE)
train_generator = threaded_generator(train_generator, num_cached=20)

In [32]:
# obtain images of the same size for validation
validation_generator = random_crop_generator(batch_generator(X_valid, Y_valid, BATCH_SIZE), 
                                             info=True, crop_size=PATCH_SIZE)
validation_generator = threaded_generator(validation_generator, num_cached=20)

In [None]:
#### TRAIN NN ####
logs_path = './logs.txt'
from utils.persistence import *
import time

print "N_EPOCHS = ", N_EPOCHS
nn_weights = results_eval_path + eval_name + "_weights.pickle"
TRAIN_NN=True
if TRAIN_NN:
    train_nn(N_EPOCHS) 
else:
    try:
        a = load(cnn.outlayer_for_loss, nn_weights)
    except:
        print "problem with weights loading, nn is being trained"
        train_nn(N_EPOCHS)

N_EPOCHS =  3000
epoch 1
0 1232132, 1 78588
ce_i, reg_i, acc_i 0.886 0.004 0.879
ce_i, reg_i, acc_i 0.735 0.004 0.76
ce_i, reg_i, acc_i 0.745 0.004 0.292
ce_i, reg_i, acc_i 0.726 0.004 0.303
ce_i, reg_i, acc_i 1.241 0.004 0.521


* $ \textbf{Save weights}$ 

In [None]:
from utils.persistence import *

if not os.path.exists(results_eval_path):
    os.makedirs(results_eval_path)
    
file_path = results_eval_path + eval_name + '_weights' + '.pickle'
save(cnn.outlayer_for_loss, file_path)

In [None]:
#### TEST SAVING ####
try:
    a = load(cnn.outlayer_for_loss, nn_weights)
except:
    print "The problem occured. Weights were not saved"
else: print 'Weights were successfully saved to the file: ', file_path

* $ \textbf{Visualizations}$ 

In [None]:

def plot_some_results(pred_fn, test_generator, BATCH_SIZE,path_to_save,  n_images=10, 
                    info = False, info_threshold=0.05):
    
    def plot(d,s,r):
        fig = plt.figure(figsize=(12, 6))
            
        ax1 = fig.add_subplot(131)
        ax1.imshow(d.transpose(1,2,0))
        ax1.set_title('input')

        ax2 = fig.add_subplot(132)
        ax2.imshow(s[0])
        ax2.set_title('gt')

        ax3 = fig.add_subplot(133)
        ax3.imshow(r)
        ax3.set_title('prediction')

        plt.savefig(path_to_save+"{}.png".format(fig_ctr))
        plt.close()
            
    fig_ctr = 0
    for data, seg in test_generator:
        res = pred_fn(data)
        for d, s, r in zip(data, seg, res):
            print 'np.sum(s>0),np.sum(s==0), np.sum(r>0),np.sum(r==0)', \
            np.sum(s>0),np.sum(s==0), np.sum(r>0),np.sum(r==0)
            if info:
                info_percent = np.sum(s > 0)*1./ np.size(s.ravel())
                if info_percent < info_threshold:
                    pass
                 
                else: 
                    print 'info_percent', info_percent
                    plot(d,s,r)
                    fig_ctr += 1
                    print 'done:', fig_ctr
            else:
                    plot(d,s,r)
                    fig_ctr += 1
                    print 'done:', fig_ctr
        if fig_ctr > n_images:
            break

In [None]:
# create some png files showing (raw image, ground truth, prediction)
test_gen = random_crop_generator(batch_generator(X_test, Y_test, BATCH_SIZE), PATCH_SIZE)
path_to_save = os.path.join(results_eval_path + eval_name)
print 'plots are saved to:', path_to_save
plot_some_results(get_segmentation, test_gen, BATCH_SIZE, info=True, path_to_save=path_to_save)

In [None]:
import os
files = [os.path.join(results_eval_path,f) for f in os.listdir(results_eval_path) if f.endswith('.png')][:30]
print files

for f in files:
    plt.figure()
    plt.imshow(plt.imread(f))

---
<a name='c'/>
# Conclusions
</a>