In combine_results.ipynb, the matlab results are imported and combined with the geometry data into pickle files. However, sometimes the matlab results contain errors, or are malformed. A list of these is saved to error.pkl, which is investigated here.

When needed, we list the failed trajectories and move the geometries to a new folder, which can be used to rerun the simulations in matlab.

# Imports & define paths

In [None]:
import pickle
import numpy as np
import os
import scipy.io as sio
import funcs_helpers as fh
import matplotlib.pyplot as plt
import helper_funcs as mf

In [None]:
results_path = r"your_path_to_matlab_results"

geometries_path = r"your_path_to_geometries"

final_path = r"final_dataset"

geoms = [f for f in os.listdir(geometries_path)]
print(f'{len(geoms)} geometries found')


# Import & examine errors file

* (FIXED!) no bifurcation data
* (FIXED!) no simulations found > continue to next geom
* (FIXED!) missing time steps > continue to next geom
* (FIXED!) no bifurcation data in rerun
* (FIXED!) no simulations found in rerun > continue to next rerun
* (FIXED!) missing time steps in rerun > continue to next rerun
* trajectories of length 0 > remove those trajectories
* trajectories of length <= 2 after removing contact
* error remains after removing contact



In [None]:
path = r'path_to_errors.pkl'  # error file generated by combine_results.ipynb

with open(path, 'rb') as f:
    errors = pickle.load(f)

print(f'Nr of errors: {len(errors)}')
errors[:4]

In [None]:
e, inv, c = np.unique([e[1] for e in errors], return_inverse=True, return_counts=True)
c_tr = np.array([e[2] for e in errors])
print(f'{"Error":50} {"# Geoms":>8} {"# Trajs":>8} {"Percentage"}')
for i, err in enumerate(e):
    num = np.sum(c_tr[inv == i])
    print(f'{err:50} {c[i]:8} {num:8} {num/12240*100:.2}%')

In [None]:
[e for e in errors if e[1] == 'No simulations found']

In [None]:
# check for overlap between errors
unique_geoms, inv = np.unique([e[0] for e in errors], return_inverse=True)
error_geoms = np.array([e[0] for e in errors])

# iterate over all geometries that have errors
for i, geom in enumerate(unique_geoms):
    # get indices of errors for this geometry
    inds = np.where(inv == i)[0]
    if len(inds) > 1:
        print(f'\n{geom}')
        traj_inds = []
        for ind in inds: # iterate over each error involving this geometry
            # print(f'\t{ind:4} {errors[ind][1]:52} {errors[ind][3]}')
            print(errors[ind])
            try:
                traj_inds.extend(errors[ind][3])
            except IndexError:
                traj_inds.extend(list(range(12)))
                pass
        # check if there are duplicates, meaning one trajectory is involved in multiple errors
        if not len(traj_inds) == len(np.unique(traj_inds)):
            print('traj_inds:', traj_inds,
                  len(traj_inds),
                  len(np.unique(traj_inds))
                  )

            for ind in inds:
                try:
                    print(f'\t{ind:4} {errors[ind][1]:52} {errors[ind][3]}')
                except IndexError:
                    print(f'\t{ind:4} {errors[ind][1]:52} {list(range(12))}')

# Handle 'Error remains after removing contact'

In [None]:
errorFlag_geoms = [e for e in errors if e[1] == 'Error remains after removing contact' or e[1]=='Trajectories of length 0 in data']

In [None]:
errorFlaglist = []
for e in errorFlag_geoms:
    # print(e)
    for ind in e[-1]:
        errorFlaglist.append((e[0], ind))


In [None]:
len(errorFlaglist)

In [None]:
values = np.unique(errorFlaglist, axis=0)
values.shape


In [None]:
for val in values:
    print(val[0])
    print(int(val[1])+1)

In [None]:
for val in values:
    print(f"'{val[0]}', ...")

In [None]:
for val in values:
    print(f'{int(val[1])+1}, ...')

In [None]:
import shutil

## Copy geometries and previous results to new folders

In [None]:
# copy the geometries that have errors to a new folder

# first create the new folder if necessary
to_rerun_folder = r"geoms_to_rerun"

if not os.path.exists(to_rerun_folder):
    os.makedirs(to_rerun_folder)
else:
    raise Warning(f'{to_rerun_folder} already exists')

# copy the .mat files of the geometries that have errors to a new folder
for val in values:
    geom = val[0]
    traj = val[1]
    src = os.path.join(geometries_path, geom, f'{geom}_00.mat')
    dst = os.path.join(to_rerun_folder, f'{geom}_00.mat')
    print(f'Copying {src} to {dst}')

    # copy file from source to destination
    shutil.copyfile(src, dst)

In [None]:
# copy the previous results that have errors to a new folder

to_rerun_folder2 = r"matlab_results_to_rerun"

# first create the new folder if necessary
if not os.path.exists(to_rerun_folder2):
    os.makedirs(to_rerun_folder2)
else:
    raise Warning(f'{to_rerun_folder2} already exists')

# copy
for val in values:
    geom = val[0]
    traj = val[1]
    src = os.path.join(results_path, f'{geom}.mat')
    dst = os.path.join(to_rerun_folder2, f'{geom}.mat')
    print(f'Copying {src} to {dst}')

    # copy file from source to destination
    shutil.copyfile(src, dst)

# Handle 'No simulations found'

In [None]:
recompute_geoms = [e[0] for e in errors if e[1] == 'No simulations found']
recompute_geoms

In [None]:
group_shape = [geom.split('_')[:2] for geom in recompute_geoms]
group, shape = zip(*group_shape)
group, shape

In [None]:
geom_fail = r"all_trajs_fail_path"  # path to the folder where the geometries with only failed trajectories are stored

In [None]:
import shutil

In [None]:
for geom in recompute_geoms:
    # move .mat output files to a new folder
    for file in os.listdir(results_path):
        if file.startswith(geom):
            source = os.path.join(results_path, file)
            target = os.path.join(results_path, 'all_trajs_fail', file)
            print(' ', source)
            print('>', target)
            shutil.move(source, target)
    for folder in os.listdir(geometries_path):
        if folder.startswith(geom):
            source = os.path.join(geometries_path, folder)
            target = os.path.join(geom_fail, folder)
            print(' ', source)
            print('>', target)
            shutil.move(source, target)

In [None]:
asdf = [file for file in os.listdir(results_path) if file.endswith('.mat') and not file.endswith('specialnodes.mat')]
print(len(asdf))

In [None]:
err_groups = [e[0].split('_')[0] for e in errors]
np.unique(err_groups, return_counts=True)

In [None]:
error_geoms = [e[0] for e in errors]
np.unique(error_geoms)

# Handle 'Missing timesteps'

In [None]:
# Examine files with missing time steps
missing_timesteps_geoms = [e[0] for e in errors if e[1] == 'Missing time steps']
for geom in missing_timesteps_geoms:
    print(geom)
    matfile = os.path.join(results_path, geom + '.mat')
    data_from_mat = sio.loadmat(matfile)

    print(data_from_mat['data_ts']['F'][0,0].shape,
          data_from_mat['data_ts']['microfluctuation'][0,0].shape)

    print(data_from_mat['data_sim']['errorFlag'][0,0][:, 0])
    print(np.sum(data_from_mat['data_sim']['errorFlag'][0,0][:, 0]).astype(bool))

    print(np.bincount(data_from_mat['data_ts']['traj'][0,0].flatten()+1))

    for key in data_from_mat['data_ts'].dtype.names:
        print(key, data_from_mat['data_ts'][key][0, 0].shape)
    print('')

In [None]:
for geom in missing_timesteps_geoms:
    print(f"'{geom}', ...")

### Move matlab results (geometries are fine and can stay)

In [None]:
import shutil

In [None]:
target_dir = os.path.join(results_path, 'missing_timesteps2')

if not os.path.exists(target_dir):
    os.makedirs(target_dir)

In [None]:
for geom in missing_timesteps_geoms:
    # move .mat output files to a new folder
    for file in os.listdir(results_path):
        if file.startswith(geom):
            source = os.path.join(results_path, file)
            target = os.path.join(target_dir, file)
            print(' ', source)
            print('>', target)
            shutil.move(source, target)


# Handle 'No bifurcation mode data found'

In [None]:
no_bifurcMode_geoms = [e[0] for e in errors if e[1] == 'No bifurcation mode data found']

In [None]:
len(no_bifurcMode_geoms)

In [None]:
for geom in no_bifurcMode_geoms:
    print(f"'{geom}', ...")

### Move matlab results (geometries are fine and can stay)

In [None]:
import shutil

In [None]:
target_dir = os.path.join(results_path, 'no_bifurcMode')

if not os.path.exists(target_dir):
    os.makedirs(target_dir)

In [None]:
for geom in no_bifurcMode_geoms:
    # move .mat output files to a new folder
    for file in os.listdir(results_path):
        if file.startswith(geom):
            source = os.path.join(results_path, file)
            target = os.path.join(target_dir, file)
            print(' ', source)
            print('>', target)
            shutil.move(source, target)


# Handle traj length <= 2

In [None]:
errors

In [None]:
contact_too_fast_geoms = [e for e in errors if e[1] == 'Trajectories of length <= 2 after removing contact' or e[1] == 'Rerun has 0 time steps']

In [None]:
len(contact_too_fast_geoms)

In [None]:
contact_too_fast_list = []
for e in contact_too_fast_geoms:
    # print(e)
    for ind in e[-1]:
        contact_too_fast_list.append((e[0], ind))


In [None]:
len(contact_too_fast_list)

In [None]:
values = np.unique(contact_too_fast_list, axis=0)
values.shape


In [None]:
for val in values:
    print(f"'{val[0]}', ...")

In [None]:
for val in values:
    print(f"{int(val[1])+1}, ...")

In [None]:
# Write the geometries and trajectory indices to a file
# This file can be used to rerun the simulations for these geometries and trajectories
with open(r'too_short_trajs.txt', 'w') as f:
    for val in values:
        f.write(f'{val[0]}\n')
        f.write(f'{int(val[1])+1}\n')

## Copy geometries and previous results to new folders

In [None]:
# copy the geometries that have errors to a folder for all the reruns

to_rerun_folder = r"to_rerun"

# create the reruns folder if necessary
if not os.path.exists(to_rerun_folder):
    os.makedirs(to_rerun_folder)
else:
    raise Warning(f'{to_rerun_folder} already exists')

# copy the .mat files of the geometries that have errors to a new folder
for val in values:
    geom = val[0]
    traj = val[1]
    src = os.path.join(geometries_path, geom, f'{geom}_00.mat')
    dst = os.path.join(to_rerun_folder, f'{geom}_00.mat')
    print(f'Copying {src} to {dst}')

    # copy file from source to destination
    shutil.copyfile(src, dst)

In [None]:
# copy the previous results that have errors to a new folder

to_rerun_folder2 = r"results_to_rerun"

# first create the new folder if necessary
if not os.path.exists(to_rerun_folder2):
    os.makedirs(to_rerun_folder2)
else:
    raise Warning(f'{to_rerun_folder2} already exists')

# copy
for val in values:
    geom = val[0]
    traj = val[1]
    src = os.path.join(results_path, f'{geom}.mat')
    dst = os.path.join(to_rerun_folder2, f'{geom}.mat')
    print(f'Copying {src} to {dst}')

    # copy file from source to destination
    shutil.copyfile(src, dst)

# Investigate 'Trajectories of length 0 in data'
Turns out this is just a subset of 'Error remains after removing contact'

In [None]:
trajlen0_geoms = [e for e in errors if e[1] == 'Trajectories of length 0 in data']
trajlen0_geoms

In [None]:
for geom in trajlen0_geoms:
    with open(os.path.join(results_path, geom[0] + '.mat'), 'rb') as f:
        data = sio.loadmat(f)
        print(data['data_sim']['errorFlag'][0,0][geom[-1]])

# Look at example cases of errors

## No trajectories at all

In [None]:
import os
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt

In [None]:
results_path = r"matlab_results_path"  # path to the folder where the matlab results are stored

geometries_path = r"your_geometries_path_here"  # path to the folder where the geometries are stored

final_path = r"final_dataset"  # path to the folder where the final dataset will be stored

In [None]:
# case without any trajectories

# geom = 'p2_hexagonal_2024-12-18_15-27-32.525021'
geom = 'pmg_square_2025-01-08_16-06-43.347689'

# load data that was sent to matlab
path = os.path.join(geometries_path, geom, geom + '_00.mat')
data_to_mat = sio.loadmat(path)

p = data_to_mat['p']
t = data_to_mat['t']
boundary_inds = data_to_mat['boundary_inds']
inds_per_fd = data_to_mat['inds_per_fd']
volume_fraction = data_to_mat['volume_fraction']


# Create edges
# turn the elements into edges and deduplicate them
edges = np.vstack((t[:, [0, 3]],
                t[:, [3, 1]],
                t[:, [1, 4]],
                t[:, [4, 2]],
                t[:, [2, 5]],
                t[:, [5, 0]]))


In [None]:
edges2 = np.sort(edges, axis=-1)
edges3, inv, counts = np.unique(edges2, axis=0, return_inverse=True, return_counts=True)

In [None]:
np.unique(counts)

In [None]:
%matplotlib qt
# plt.scatter(*p.T, s=1, alpha=0.5, c='tab:blue', zorder=10)

# # plot edges
# x, y = np.transpose(p[edges.T], axes=[2,0,1])
# edges0 = plt.plot(x, y, c='tab:blue', zorder=10)

lv = data_to_mat['lattice_vectors']

colors = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red']
for color, [shift1, shift2] in zip(colors, [[0, 0]]): #, [2, 0], [0, 2], [2,2]]):
    p_temp = p + shift1*lv[0] + shift2*lv[1]
    plt.scatter(*p_temp.T, s=1, c='black')
    for c, color in zip(np.unique(counts), colors):
        bools = counts[inv] == c

        x, y = np.transpose(p_temp[edges[bools].T], axes=[2,0,1])
        edges0 = plt.plot(x, y, alpha=0.5, c=color, label=f'count={c}')
        plt.setp(edges0[1:], label="_")

plt.legend()
#label, legend...

    # # plot filled triangles
    # temp = p_temp[t]
    # temp = np.transpose(temp, axes=[0,2,1])
    # temp = temp.reshape(-1, temp.shape[-1])
    # temp = temp[..., [0,3,1,4,2,5]]
    # plt.fill(*temp)  #, alpha=0.5)

plt.gca().set_aspect('equal')

## Missing bifurcMode

In [None]:
# case with missing bifurcMode
geom = 'cmm_rhombic_2024-05-22_14-52-31.110014'

matfile = os.path.join(results_path, geom + '.mat')
data_from_mat = sio.loadmat(matfile)

for key in data_from_mat['data_ts'].dtype.names:
    print(key, data_from_mat['data_ts'][key][0, 0].shape)

print(data_from_mat['data_ts']['bifurc'][0,0][:, 0])


In [None]:
# case with missing bifurcMode
geom = 'cm_hexagonal1_2024-05-22_14-23-52.891252'

matfile = os.path.join(results_path, geom + '.mat')
data_from_mat = sio.loadmat(matfile)

for key in data_from_mat['data_ts'].dtype.names:
    print(key, data_from_mat['data_ts'][key][0, 0].shape)

print(data_from_mat['data_ts']['bifurc'][0,0][:, 0])


## Missing time step

In [None]:
# case with a missing time step
geom = 'cmm_rhombic_2024-05-22_14-52-17.337969'

matfile = os.path.join(results_path, geom + '.mat')
data_from_mat = sio.loadmat(matfile)

for key in data_from_mat['data_ts'].dtype.names:
    print(key, data_from_mat['data_ts'][key][0, 0].shape)


In [None]:
print(data_from_mat['data_ts']['W'])

In [None]:
# try to find the missing time step (probably the last one)

plt.figure()
w = data_from_mat['data_ts']['microfluctuation'][0, 0]
diff = w[1:] - w[:-1]
diff_norm = np.linalg.norm(diff, axis=(1,2))
plt.plot(diff_norm)
asdf = np.where(data_from_mat['data_ts']['bifurc'][0,0][:, 0])[0]
for a in asdf:
    plt.axvline(a, c='r', linestyle='--')
# asdf = np.where(diff_norm > 7)[0] + 1
# for a in asdf:
#     plt.axvline(a, c='g')
d_tr = np.diff(data_from_mat['data_ts']['traj'][0,0][:, 0])
asdf = np.where(d_tr > 0)[0]
for a in asdf:
    plt.axvline(a, c='b', linestyle='--')
m = np.mean(w, axis=(1,2))
plt.scatter(np.arange(len(w)), m/np.max(m)*np.max(diff_norm))
plt.plot(data_from_mat['data_ts']['W'][0,0][:, 0])
# plt.scatter(asdf, [0]*len(asdf))

## Error remains after removing contact

In [None]:
geom = 'cmm_hexagonal_2024-05-22_14-51-10.878244'
traj_ind = 9

In [None]:

matfile = os.path.join(results_path, geom + '.mat')
data_from_mat = sio.loadmat(matfile)

for key in data_from_mat['data_ts'].dtype.names:
    print(key, data_from_mat['data_ts'][key][0, 0].shape)
print('')
for key in data_from_mat['data_sim'].dtype.names:
    print(key, data_from_mat['data_sim'][key][0, 0].shape)

In [None]:
print(data_from_mat['data_sim']['errorFlag'][0,0])

In [None]:
print(data_from_mat['data_sim']['F_final'][0,0][9])

In [None]:
print(data_from_mat['data_sim']['errorFlag'][0,0][9])

In [None]:
traj = data_from_mat['data_ts']['traj'][0,0][:, 0]
traj

In [None]:
times = data_from_mat['data_ts']['Time'][0,0].flatten()
times

In [None]:
times[traj == traj_ind + 1]

In [None]:
data_from_mat['data_sim']['F_final'][0,0][9]

In [None]:
print(matfile)

In [None]:
print(os.path.join(geometries_path, geom, geom + '_00.mat'))

## Error remains after rerun with reduced min time step

In [None]:
import os
import scipy.io as sio

In [None]:
geom = 'cm_hexagonal2_2024-05-22_14-27-49.234689'

# path with rerun results after running the simulations again
rerun_path = r'data_reruns'

for file in os.listdir(rerun_path):
    if file.startswith(geom) and file.endswith('.mat'):
        print(file)

        data = sio.loadmat(os.path.join(rerun_path, file))

        print(data['data_sim']['errorFlag'][0,0])
        # print(data['data_sim']['F_final'][0,0][0])

# set numpy print options to print to full precision
np.set_printoptions(precision=16)

# previous result
with open(os.path.join(results_path, geom + '.mat'), 'rb') as f:
    data = sio.loadmat(f)

    print(data['data_sim']['errorFlag'][0,0])

    print(data['data_sim']['F_final'][0,0][[8]])

## Rerun has 0 time steps

In [None]:
errorFlag_geoms = [e for e in errors if e[1] == 'Rerun has 0 time steps']

In [None]:
errorFlag_geoms