In [1]:
import diff_classifier.knotlets as kn


# Experiment Initialization

First, I define the nomenclature I use to name my files, as well as specify exceptions (files that weren't generated or are missing and will be skipped in the analysis).  In this case, I was analyzing data collected in tissue slices.  Videos are named according to the pup number, the slice number, the hemisphere, and the video number.

In [2]:
to_track = []
result_futures = {}
start_knot = 2 #Must be unique number for every run on Cloudknot.

remote_folder = 'test_files' #Folder in AWS S3 containing files to be analyzed
bucket = 'nancelab.publicfiles'
vids = 2

for num in range(1, vids+1):
    to_track.append('test_{}'.format('%02d' % num))

The videos used with this analysis are fairly large (2048 x 2048 pixels and 651 frames), and in cases like this, the tracking algorithm can quickly eat up RAM. In this case, we chose to crop the videos to 512 x 512 images such that we can run our jobs on smaller EC2 instances with 8GB of RAM. 

Note that larger jobs can be made with user-defined functions such that splitting isn't necessary-- or perhaps an intermediate amount of memory that contains splitting, tracking, and msd calculation functions all performed on a single EC2 instance.

The compiled functions in the knotlets module require access to buckets on AWS. In this case, we will be using a publicly (read-only) bucket. If users want to run this notebook on their own, will have to transfer files from nancelab.publicfiles to their own bucket, as it requires writing to S3 buckets.

In [None]:
for prefix in to_track:
    kn.split(prefix, remote_folder=remote_folder)

In [None]:
import os
import diff_classifier.imagej as ij
import boto3
import os.path as op
import diff_classifier.aws as aws
import diff_classifier.knotlets as kn
import numpy as np
from sklearn.externals import joblib

In [None]:
tnum=3 #number of training datasets
pref = []
for num in to_track:                    
    for row in range(0, 4):
        for col in range(0, 4):
            pref.append("{}_{}_{}".format(num, row, col))

y = np.array([9.4, 7.7, 7.4])

# Creates regression object based of training dataset composed of input images and manually
# calculated quality cutoffs from tracking with GUI interface.
regress = ij.regress_sys(remote_folder, pref, y, tnum, have_output=True, bucket_name=bucket)
#Read up on how regress_sys works before running.

In [None]:
#Pickle object
filename = 'regress.obj'
with open(filename,'wb') as fp:
    joblib.dump(regress,fp)

import boto3
s3 = boto3.client('s3')
aws.upload_s3(filename, remote_folder+'/'+filename, bucket_name=bucket)

In [3]:
tparams = {'radius': 5.0, 'threshold': 0.0, 'do_median_filtering': False,
           'quality': 10.0, 'xdims': (0, 511), 'ydims': (1, 511),
           'median_intensity': 300.0, 'snr': 0.0, 'linking_max_distance': 15.0,
           'gap_closing_max_distance': 20.0, 'max_frame_gap': 5,
           'track_duration': 20.0}

In [None]:
import cloudknot as ck
import os.path as op

github_installs=('https://github.com/ccurtis7/diff_classifier.git@docs')
my_image = ck.DockerImage(func=kn.tracking, base_image='arokem/python3-fiji:0.3', github_installs=github_installs)

docker_file = open(my_image.docker_path)
docker_string = docker_file.read()
docker_file.close()

req = open(op.join(op.split(my_image.docker_path)[0], 'requirements.txt'))
req_string = req.read()
req.close()

new_req = req_string[0:req_string.find('\n')-4]+'5.28'+ req_string[req_string.find('\n'):]
req_overwrite = open(op.join(op.split(my_image.docker_path)[0], 'requirements.txt'), 'w')
req_overwrite.write(new_req)
req_overwrite.close()

In [None]:
my_image.build("0.1", image_name="test_image")

In [4]:
names = []
tparams_l = []
folder_l = []
bucket_l = []
regress_l = []
rows_l = []
cols_l = []
ires_l = []
all_maps = []
for prefix in to_track:    
    for i in range(0, 4):
        for j in range(0, 4):
            names.append('{}_{}_{}'.format(prefix, i, j))
            tparams_l.append(tparams)
            folder_l.append(remote_folder)
            bucket_l.append(bucket)
            regress_l.append('regress.obj')
            rows_l.append(4)
            cols_l.append(4)
            ires_l.append((512, 512))
            all_maps.append(('{}_{}_{}'.format(prefix, i, j), remote_folder, bucket, 'regress.obj', 4, 4, (512, 512), tparams))

In [7]:
all_maps

[('test_01_0_0',
  'test_files',
  'nancelab.publicfiles',
  'regress.obj',
  4,
  4,
  (512, 512),
  {'do_median_filtering': False,
   'gap_closing_max_distance': 20.0,
   'linking_max_distance': 15.0,
   'max_frame_gap': 5,
   'median_intensity': 300.0,
   'quality': 7.6513562333555605,
   'radius': 5.0,
   'snr': 0.0,
   'threshold': 0.0,
   'track_duration': 20.0,
   'xdims': (0, 511),
   'ydims': (1, 511)}),
 ('test_01_0_1',
  'test_files',
  'nancelab.publicfiles',
  'regress.obj',
  4,
  4,
  (512, 512),
  {'do_median_filtering': False,
   'gap_closing_max_distance': 20.0,
   'linking_max_distance': 15.0,
   'max_frame_gap': 5,
   'median_intensity': 300.0,
   'quality': 7.6513562333555605,
   'radius': 5.0,
   'snr': 0.0,
   'threshold': 0.0,
   'track_duration': 20.0,
   'xdims': (0, 511),
   'ydims': (1, 511)}),
 ('test_01_0_2',
  'test_files',
  'nancelab.publicfiles',
  'regress.obj',
  4,
  4,
  (512, 512),
  {'do_median_filtering': False,
   'gap_closing_max_distance': 20

In [None]:
knot = ck.Knot(name='download_and_track_{}_c{}'.format('test', start_knot),
               docker_image = my_image,
               memory = 16000,
               resource_type = "SPOT",
               bid_percentage = 100,
               image_id = 'ami-0e00afdf500081a0d', #May need to change this line
               pars_policies=('AmazonS3FullAccess',))

In [None]:
all_maps

In [None]:
result_futures = knot.map(all_maps, starmap=True)

In [None]:
knot.clobber()

In [6]:
kn.tracking(names[1], remote_folder=remote_folder, tparams=tparams)

/home/ubuntu/Fiji.app/ImageJ-linux64 --ij2 --headless --run /tmp/tmpzo6peasa.py
Done with tracking.  Should output file of name test_files/Traj_test_01_0_1.csv


In [8]:
for num in all_maps:
    print(num)

('test_01_0_0', 'test_files', 'nancelab.publicfiles', 'regress.obj', 4, 4, (512, 512), {'radius': 5.0, 'threshold': 0.0, 'do_median_filtering': False, 'quality': 7.6513562333555605, 'xdims': (0, 511), 'ydims': (1, 511), 'median_intensity': 300.0, 'snr': 0.0, 'linking_max_distance': 15.0, 'gap_closing_max_distance': 20.0, 'max_frame_gap': 5, 'track_duration': 20.0})
('test_01_0_1', 'test_files', 'nancelab.publicfiles', 'regress.obj', 4, 4, (512, 512), {'radius': 5.0, 'threshold': 0.0, 'do_median_filtering': False, 'quality': 7.6513562333555605, 'xdims': (0, 511), 'ydims': (1, 511), 'median_intensity': 300.0, 'snr': 0.0, 'linking_max_distance': 15.0, 'gap_closing_max_distance': 20.0, 'max_frame_gap': 5, 'track_duration': 20.0})
('test_01_0_2', 'test_files', 'nancelab.publicfiles', 'regress.obj', 4, 4, (512, 512), {'radius': 5.0, 'threshold': 0.0, 'do_median_filtering': False, 'quality': 7.6513562333555605, 'xdims': (0, 511), 'ydims': (1, 511), 'median_intensity': 300.0, 'snr': 0.0, 'link