# Figure 2 - Analysis

In [None]:
import sys
sys.path.append("..")

from main import *

import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr

plt.rcParams['font.size'] = 12

if torch.cuda.is_available():  
    device = "cuda:0" 
else:  
    device = "cpu" 
print(device)

#### Loading data

In [None]:
vertices = np.load('../Files/vertices_ellipse.npy').astype('float')
order = np.argsort(vertices[:, 2])
vertices = vertices[order]
eigenmodes = np.load('../Files/eigenmodes_ellipse.npy')[order]

# Evaluating the effect of connectivity radius $h$

Simulations were conducted externally on a computer cluster (`Compute Canada`) and the results are loaded here for analysis. See the script in the `ComputeCanada/kernel_size/` folder for a code example.

In [None]:
scores_per_h = np.load('../Results/scores_per_h_2500.npy') # 50 h values, 10 simulations per h value

In [None]:
h_values = np.linspace(0.025, 1, 50)

print(h_values[np.argmax(np.mean(scores_per_h, axis=1))])

Plotting the average correlation between eigenmodes and gradients for varying $h$ values.

In [None]:
fig, ax = plt.subplots(figsize=(5, 5))
ax.plot(h_values, np.mean(scores_per_h, axis=1), color='red', linewidth=2)
for i in range(scores_per_h.shape[1]):
    ax.scatter(h_values, scores_per_h[:, i], color='black', s=5)

ax.spines[['top', 'right']].set_visible(False)
plt.ylabel('Average $|r|$ (50 modes)')
plt.xlabel('$h$')
#plt.xlim([0, 0.25])

### Fitting curves

Fitting an exponential function in the decaying part of the previous average curve.

In [None]:
from scipy.optimize import curve_fit

def model(x, a, b, c):
    return (a / (x ** b)) + c

In [None]:
i = 6
x = h_values[i:]
y = np.mean(scores_per_h, axis=1)[i:]

initial_guess = [0, 2, 0.05]
params, _ = curve_fit(model, x, y, p0=initial_guess, maxfev=5000)

fig, ax = plt.subplots(figsize=(5, 5))
for i in range(10):
    plt.scatter(x, y, color='black', alpha=0.5)
plt.plot(x, model(x, params[0], params[1], params[2]), color='red', alpha=1, linewidth=2)
ax.spines[['top', 'right']].set_visible(False)
plt.xlabel('$h$')
plt.ylabel('Reconstruction quality')
plt.show()

print(params)

#### Estimating exponent from bootstrap sampling

Randomly selecting individual simulations for each $h$ value, then computing exponents across a certain range of arbitrary cutoff points.

In [None]:
params_all = []

for _ in tqdm(range(10000)):

    # Randomly subsampling
    ids = np.random.uniform(0, scores_per_h.shape[1], (scores_per_h.shape[0], )).astype('int')
    scores = []
    for j, i in enumerate(ids):
        scores.append(scores_per_h[j, i])

    # Fitting for different cutoff points
    params = []
    for i in range(7, 10): # Different cutoff points
        x = h_values[i:]
        y = scores[i:]
        initial_guess = [0, 3, 0.05]
        p, _ = curve_fit(model, x, y, p0=initial_guess, maxfev=5000)
        params.append(p)

    params_all += params

params = np.stack(params_all, axis=0)

Average exponent:

In [None]:
np.mean(params, axis=0)[1]

In [None]:
plt.hist(params[:, 1], bins=30)
plt.show()

In [None]:
np.save('../Results/exponents_distribution.npy', params)

#### Plotting all exponentials

In [None]:
fig, ax = plt.subplots(figsize=(5, 5))
for i in range(scores_per_h.shape[1]):
    ax.scatter(h_values[7:], scores_per_h[7:, i], color='black', s=5)

x = h_values[7:]
for p in params[:100]:
    plt.plot(x, model(x, p[0], p[1], p[2]), color='red', alpha=0.1, linewidth=1)
plt.xlim([0.12, 0.6])


ax.spines[['top', 'right']].set_visible(False)
plt.ylabel('Average $|r|$ (50 modes)')
plt.xlabel('$h$')
#plt.xlim([0, 0.25])

# Edge-swapping

#### Analyzing mode correlations vs # of edge swaps

Again, results were obtained externally on a computer cluster. See the code in the `ComputeCanada/edge_swapping/` folder.

In [None]:
D_mean = np.load('../ComputeCanada/edge_swapping/avg_d_per_rho_swaps.npy') # Average connection length after swaps
mode_similarities = np.load('../ComputeCanada/edge_swapping/mode_correlations_per_rho_swaps.npy') # Eigenmode-gradient correlations after edge swaps

fractions = np.linspace(0, 0.99, 36, endpoint=True)

scores = np.zeros((mode_similarities.shape[0], mode_similarities.shape[1]))
for i in range(mode_similarities.shape[0]):
    for j in range(mode_similarities.shape[1]):
        scores[i, j] = np.mean(np.abs(np.diag(mode_similarities[i, j])))

#### Number of edge swaps vs average connection length

In [None]:
fig, ax = plt.subplots(figsize=(5, 5))

ax.plot(fractions, np.mean(D_mean, axis=1), color='red', linewidth=2)
for i in range(D_mean.shape[1]):
    ax.scatter(fractions, D_mean[:, i], color='black', s=5)

ax.spines[['top', 'right']].set_visible(False)
plt.ylabel('Average $d$')
plt.xlabel('Fraction of edge swaps')
#plt.xlim([0, 0.25])

#### Eigenmode-gradient correlations following edge swaps

In [None]:
fig, ax = plt.subplots(figsize=(5, 5))

ax.plot(fractions, np.mean(scores, axis=1), color='red', linewidth=2)
for i in range(scores.shape[1]):
    ax.scatter(fractions, scores[:, i], color='black', s=5)

ax.spines[['top', 'right']].set_visible(False)
plt.xlabel('Fraction of edges swapped')
plt.ylabel('Mode correspondence')
#plt.xlim([0, 0.25])

#### Comparison with $h$ variation

Measuring average distance for different $h$ values and using these distances to compare with edge swapping.

In [None]:
N = 2500
N_runs = 10

avg_distance = []
for h in tqdm(h_values):

    avg_distance_per_run = []
    
    for _ in range(N_runs):
    
        random_ids = [1] * N + [0] * (vertices.shape[0] - N)
        np.random.shuffle(random_ids)
        coords = vertices[np.array(random_ids) == 1] 
        d = compute_distances(coords, coords)
        avg_distance_per_run.append(np.mean(d[d <= h]))

    avg_distance.append(avg_distance_per_run)
    
avg_distance = np.stack(avg_distance)

Plotting the gradient-eigenmode correlations for $h$ expansion (red) and edge swapping (blue)

In [None]:
blue = np.array([66, 135, 245]) / 255

In [None]:
fig, ax = plt.subplots(figsize=(5, 5))

#for i in range(scores_per_h.shape[1]):
#    ax.scatter(h_values[5:], scores_per_h[5:, i], color='red', alpha=0.25, edgecolor='None')
#ax.plot(h_values[5:], np.mean(scores_per_h[5:], axis=1), color='red', linewidth=3)

for i in range(scores_per_h.shape[1]):
    ax.scatter(np.mean(avg_distance, axis=1), scores_per_h[:, i], color='red', alpha=0.25, edgecolor='None')
ax.plot(np.mean(avg_distance, axis=1), np.mean(scores_per_h, axis=1), color='red', linewidth=3)

for i in range(scores.shape[1]):
    ax.scatter(D_mean[:, i], scores[:, i], color=0.75 * blue, alpha=0.25, edgecolor='None')
ax.plot(np.mean(D_mean, axis=1), np.mean(scores, axis=1), color=0.75 * blue, linewidth=3)
ax.scatter(np.mean(D_mean, axis=1), np.mean(scores, axis=1), color=0.75 * blue)


ax.spines[['top', 'right']].set_visible(False)
plt.xlabel('Mean $d$')
plt.ylabel('Mode correspondence')
#plt.xlim([0.11, 0.34])

In [None]:
np.save('../Results/figure2_avg_distance.npy', np.mean(avg_distance, axis=1))