In [2]:
import os
import time

from CAJAL.lib import run_gw, sample_mesh

# Run GW on sampled points using scipy dist metric (usually Euclidean)

In [3]:
# Sample points from cells in obj files (each obj file contains multiple cells)
start = time.time()
infolder = "../CAJAL/data/obj_files"
outfolder = "../CAJAL/data/sampled_pts/obj_sampled_50"
sample_mesh.save_sample_from_obj_parallel(infolder, outfolder, n_sample=50, disconnect=True, num_cores=8)
print('Total time elapsed: {:.3f}s'.format(time.time() - start))

Total time elapsed: 4.758s


In [4]:
# Set parameters
data_dir = os.path.abspath('../CAJAL/data/sampled_pts/obj_sampled_50/')
gw_results_dir = os.path.abspath('../CAJAL/data/gw_results')
data_prefix = None # Not required, but helpful if another file (like python_list_dir.txt) is in same folder
num_cores = 12
file_prefix = "obj_euclidean"

In [5]:
# Compute Euclidean distance on sampled points, then compute and save GW distance between cells
t1 = time.time()
print("Calculating distances between points in each dataset")
dist_mat_list = run_gw.get_distances_all(data_dir=data_dir, data_prefix=data_prefix)
t2 = time.time()
print("Calculating GW distances between datasets")
run_gw.save_dist_mat_preload_global(dist_mat_list, file_prefix, gw_results_dir, save_mat=False, num_cores=num_cores)
t3 = time.time()
print('Time to compute distances: {:.3f}s'.format(t2 - t1))
print('Time in GW calculation: {:.3f}s'.format(t3 - t2))
print('Total time elapsed: {:.3f}s'.format(t3 - t1))

Calculating distances between points in each dataset
Calculating GW distances between datasets
Time to compute distances: 0.671s
Time in GW calculation: 1.386s
Total time elapsed: 2.057s


# Run GW on precomputed distance (usually geodesic)

In [6]:
# Save geodesic network distance on sampled points from neuron reconstruction files
start = time.time()
infolder = "../CAJAL/data/OBJ_files"
outfolder = "../CAJAL/data/sampled_pts/obj_geodesic_50"
sample_mesh.save_geodesic_from_obj_parallel(infolder, outfolder, n_sample=50, method="heat", connect=False, num_cores=8)
print('Total time elapsed: {:.3f}s'.format(time.time() - start))

Total time elapsed: 8.183s


In [7]:
# Set parameters
distances_dir = os.path.abspath('../CAJAL/data/sampled_pts/obj_geodesic_50/')
gw_results_dir = os.path.abspath('../CAJAL/data/gw_results')
data_prefix = None # Not required, but helpful if another file (like python_list_dir.txt) is in same folder
num_cores = 12
file_prefix = "obj_geodesic"

In [8]:
# Load geodesic distances, then compute and save GW distance between cells
t1 = time.time()
print("Calculating distances between points in each dataset")
dist_mat_list = run_gw.load_distances_global(distances_dir=distances_dir, data_prefix=data_prefix)
t2 = time.time()
print("Calculating GW distances between datasets")
run_gw.save_dist_mat_preload_global(dist_mat_list, file_prefix, gw_results_dir, save_mat=False, num_cores=num_cores)
t3 = time.time()
print('Time to load distances: {:.3f}s'.format(t2 - t1))
print('Time in GW calculation: {:.3f}s'.format(t3 - t2))
print('Total time elapsed: {:.3f}s'.format(t3 - t1))

Calculating distances between points in each dataset
Calculating GW distances between datasets
Time to load distances: 0.921s
Time in GW calculation: 1.441s
Total time elapsed: 2.362s


# Save python_list_dir
R and Python sort strings differently, so I save the python ordering in case I need to load the distance matrix into R

In [10]:
pj = lambda *paths: os.path.abspath(os.path.join(*paths))
distances_dir = os.path.abspath("../CAJAL/data/sampled_pts/obj_sampled_50")
data_prefix = None
files_list = os.listdir(distances_dir)
files_list = [data_file for data_file in files_list\
              if data_prefix is None or data_file.startswith(data_prefix)]
files_list.sort() # sort the list because sometimes os.listdir() result is not sorted
list_dir_file = open(pj(distances_dir,"python_list_dir.txt"), "w+")
for file_name in files_list:
    list_dir_file.write(file_name+"\n")
list_dir_file.close()