In [2]:
import os
import time

from CAJAL.lib import run_gw

# Compute Euclidean distance on set of points, then run GW

In [7]:
# Set parameters
data_dir = os.path.abspath('../CAJAL/tests/sampled_pts/example_euclidean/')
gw_results_dir = os.path.abspath('../CAJAL/tests/gw_results')
data_prefix = "a10_full_50" # Not required, but helpful if another file (like python_list_dir.txt) is in same folder
num_cores = 12
file_prefix = "example_euclidean"

In [8]:
t1 = time.time()
print("Calculating distances between points in each dataset")
dist_mat_list = run_gw.get_distances_all(data_dir=data_dir, data_prefix=data_prefix)
t2 = time.time()
print("Calculating GW distances between datasets")
run_gw.save_dist_mat_preload_global(dist_mat_list, file_prefix, gw_results_dir, save_mat=False, num_cores=num_cores)
t3 = time.time()
print('Time to compute distances: {:.3f}s'.format(t2 - t1))
print('Time in GW calculation: {:.3f}s'.format(t3 - t2))
print('Total time elapsed: {:.3f}s'.format(t3 - t1))

Calculating distances between points in each dataset
Calculating GW distances between datasets
Time to compute distances: 2.279s
Time in GW calculation: 9.848s
Total time elapsed: 12.127s


# Run GW on precomputed distance (usually geodesic)

In [9]:
# Set parameters
distances_dir = os.path.abspath('../CAJAL/tests/sampled_pts/example_geodesic/')
gw_results_dir = os.path.abspath('../CAJAL/tests/gw_results')
data_prefix = "a10_full_50" # Not required, but helpful if another file (like python_list_dir.txt) is in same folder
num_cores = 12
file_prefix = "example_geodesic"

In [11]:
t1 = time.time()
print("Calculating distances between points in each dataset")
dist_mat_list = run_gw.load_distances_global(distances_dir=distances_dir, data_prefix=data_prefix)
t2 = time.time()
print("Calculating GW distances between datasets")
run_gw.save_dist_mat_preload_global(dist_mat_list, file_prefix, gw_results_dir, save_mat=False, num_cores=num_cores)
t3 = time.time()
print('Time to load distances: {:.3f}s'.format(t2 - t1))
print('Time in GW calculation: {:.3f}s'.format(t3 - t2))
print('Total time elapsed: {:.3f}s'.format(t3 - t1))

Calculating distances between points in each dataset
Calculating GW distances between datasets
Time to load distances: 2.835s
Time in GW calculation: 4.577s
Total time elapsed: 7.412s


# Save python_list_dir
R and Python sort strings differently, so I save the python ordering in case I need to load the distance matrix into R

In [6]:
pj = lambda *paths: os.path.abspath(os.path.join(*paths))
distances_dir = os.path.abspath("./sampled_pts/example_euclidean")
data_prefix = "None"
files_list = os.listdir(distances_dir)
files_list = [data_file for data_file in files_list\
              if data_prefix is None or data_file.startswith(data_prefix)]
files_list.sort() # sort the list because sometimes os.listdir() result is not sorted
list_dir_file = open(pj(distances_dir,"python_list_dir.txt"), "w+")
for file_name in files_list:
    list_dir_file.write(file_name+"\n")
list_dir_file.close()