In [None]:
# Setup and Imports

# %reload_ext : Reloads an IPython extension by its module name.
%reload_ext autoreload
# %autoreload 2 : Reloads all modules (except those excluded by %aimport)  
#  every time before executing the Python code typed.
%autoreload 2

# %TEXT : code in this format is called a "magic function" 
#  https://stackoverflow.com/questions/43027980/purpose-of-matplotlib-inline/43028034

import os
import sys
import time

import matplotlib.pyplot as plt
# Sets backend of matplotlib to the 'inline' backend
%matplotlib inline
import numpy as np

sys.path.append(os.path.join(os.environ['REPO_DIR'], 'utilities'))
from utilities2015 import *
from metadata import *
from data_manager import *
from learning_utilities import *


stack = 'MD662'
print('testing metadata: ',metadata_cache['valid_filenames_all'][stack][0])

# STEP 1
### raw (.jp2) -> raw_Ntb (.tif)

In [None]:
in_dir = '/media/alexn/BstemAtlasDataBackup/MD662/'
output_dir = create_if_not_exists(DataManager.get_image_dir_v2(stack=stack, prep_id=None, resol='raw'))

# INPUT TEST
in_list = [os.path.join(in_dir, img_name + '_lossless.jp2') 
                                       for img_name in list(image_names_all_data_dirs_flattened)]
# OUTPUT TEST
out_list = [DataManager.get_image_filepath_v2(stack=stack, prep_id=None, 
                                        resol='raw', version=None, fn=img_name) 
                                        for img_name in list(image_names_all_data_dirs_flattened)]

print('first input file: ',in_list[0])
print('first output file: ',out_list[0])

In [None]:
# Multiple core processing on every individual file (takes about 4 minutes each)
# In total will take about 30 hours
# Creates new files at /CSHL_data_processed/MD662/MD662_raw/*_raw.tif
t = time.time()

run_distributed('export LD_LIBRARY_PATH=%(kdu_dir)s:$LD_LIBRARY_PATH; %(kdu_bin)s -i \"%%(in_fp)s\" -o \"%%(out_fp)s\"' % \
                {'kdu_bin': KDU_EXPAND_BIN, 'kdu_dir': os.path.dirname(KDU_EXPAND_BIN)},
                kwargs_list={'in_fp': [os.path.join(in_dir, img_name + '_lossless.jp2') 
                                       for img_name in list(image_names_all_data_dirs_flattened)], 
                             'out_fp': [DataManager.get_image_filepath_v2(stack=stack, prep_id=None, 
                                        resol='raw', version=None, fn=img_name) 
                                        for img_name in list(image_names_all_data_dirs_flattened)]},
                argument_type='single',
                jobs_per_node=1, # Use single process
                local_only=True, # Run local
                use_aws=False)   # Run local
print 'done in', time.time() - t, 'seconds' # 2252 seconds full stack

# STEP 2
### raw_Ntb -> thumbnail_Ntb
rescale

# & STEP 3
### thumbnail_Ntb -> thumbnail_NtbNormalized
normalize_intensity

In [None]:
create_if_not_exists(DataManager.get_image_dir_v2(stack=stack, prep_id=None, resol='raw', version='Ntb'))

In [None]:
# raw -> raw_Ntb
# Multiple core processing on every individual file (takes about 4 minutes each)
# In total will take about 20 hours
# Creates new files at CSHL_data_processed/MD662/MD662_raw_Ntb/*_raw_Ntb.tif
t = time.time()
run_distributed('convert \"%(in_fp)s\" -channel B -separate \"%(out_fp)s\"',
                kwargs_list={'in_fp': [DataManager.get_image_filepath_v2(stack=stack, prep_id=None, 
                                        resol='raw', version=None, fn=img_name) 
                                       for img_name in list(image_names_all_data_dirs_flattened)], 
                             'out_fp': [DataManager.get_image_filepath_v2(stack=stack, prep_id=None, 
                                        resol='raw', version='Ntb', fn=img_name) 
                                        for img_name in list(image_names_all_data_dirs_flattened)]},
                argument_type='single',
                jobs_per_node=1,
                local_only=True,
               use_aws=False)
print('finished generating raw_Ntb files')
print 'done in', time.time() - t, 'seconds' # 2252 seconds full stack

thumbnail_downscale_factor = 32
tb_resol = 'thumbnail'

# Takes about 1 minute per file, in total about 8-10 hours

# Will create new files at the filepaths /CSHL_data_processed/MD662/MD662_thumbnail_Ntb/*_thumbnail_Ntb.tif
#  and /CSHL_data_processed/MD662/MD662_thumbnail_NtbNormalized/*_thumbnail_NtbNormalized.tif
# Only 108 files in each directory though?
i = 0

for img_name in metadata_cache['valid_filenames_all'][stack]:
    i = i+1
    print '\n\n'+img_name+'\n'

    t = time.time()

    in_fp = DataManager.get_image_filepath_v2(stack=stack, prep_id=None, resol='raw', version='Ntb', \
                                              fn=img_name)
    out_fp = DataManager.get_image_filepath_v2(stack=stack, prep_id=None, resol=tb_resol, version='Ntb', \
                                              fn=img_name)
    create_parent_dir_if_not_exists(out_fp)
    
  #  if os.path.isfile(out_fp):
  #      print('SKIPPING NeurotraceB: '+out_fp)
  #      #continue
  #  else:
    
    try:
        img = imread(in_fp)
    except IndexError:
        print('Problematic file detected\n\n'+out_fp+'\n\n')
        
    print(out_fp)
    img_tb = img[::thumbnail_downscale_factor, ::thumbnail_downscale_factor]
    imsave(out_fp, img_tb)
    
    
    # Alternative: ImageMagick introduces an artificial noisy stripe in the output image.
#     cmd = 'convert %(in_fp)s -scale 3.125%% %(out_fp)s' % {'in_fp': in_fp, 'out_fp': out_fp}
#     execute_command(cmd)
        
    sys.stderr.write("Rescale: %.2f seconds.\n" % (time.time() - t)) # ~20s / image
    
    t = time.time()

    in_fp = DataManager.get_image_filepath_v2(stack=stack, prep_id=None, resol=tb_resol, version='Ntb', \
                                              fn=img_name)
    out_fp = DataManager.get_image_filepath_v2(stack=stack, prep_id=None, resol=tb_resol, version='NtbNormalized',\
                                               fn=img_name)
    create_parent_dir_if_not_exists(out_fp)
    
  #  if os.path.isfile(out_fp):
  #      print('SKIPPING Ntb Normalized: '+out_fp)
  #      continue
  #  else:
    print(out_fp)
    cmd = """convert "%(in_fp)s" -normalize -depth 8 "%(out_fp)s" """ % {'in_fp': in_fp, 'out_fp': out_fp}
    execute_command(cmd)
  #  try:
  #      img = imread(in_fp)
  #  except IndexError:
  #      print('Problematic file detected\n\n'+out_fp+'\n\n')
    
    
    sys.stderr.write("Intensity normalize: %.2f seconds.\n" % (time.time() - t))
print i