## Perturbative footprint study: footprints clustering

### Imports

In [None]:
# Import standard modules
import numpy as np
import pickle
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import hdbscan

# Nicer style
import seaborn as sns
sns.set_theme(style="whitegrid")


### Load footprints and plot a sample

In [None]:
# Load l_fp
with open("output/l_fp.pkl", "rb") as f:
    l_fp = pickle.load(f)

# Load list of bunch numbers
with open("output/l_bunch_nb.pkl", "rb") as f:
    l_bunch_nb = pickle.load(f)

# Remove all nan footprints, along with bunch numbers
l_bunch_nb = [bunch_nb for i, bunch_nb in enumerate(l_bunch_nb) if not np.isnan(l_fp[i]).any()]
l_fp = [fp for fp in l_fp if not np.isnan(fp).any()]


# Make a grid of plots for all the footprints
sample_nb = 200
n_cols = 10
n_rows = int(np.ceil(len(l_fp[:sample_nb])/n_cols))
fig, axs = plt.subplots(n_rows, n_cols, figsize=(n_cols*2.5, n_rows*2))
for i, fp in enumerate(l_fp[:sample_nb]):
    try:
        ax = axs[i//n_cols, i%n_cols]
        ax.plot(fp[0], fp[1], color="C0")
        ax.plot(fp[0].T, fp[1].T, color="C0")
        ax.set_title(f"Bunch {l_bunch_nb[i]}")
        ax.set_xlabel(r"$\mathrm{Q_x}$")
        ax.set_ylabel(r"$\mathrm{Q_y}$")
        #ax.set_aspect("equal")
        ax.set_title(f"Bunch {l_bunch_nb[i]}")
        ax.grid()
    except:
        pass

plt.tight_layout()
plt.show()

### Remove outliers in each figure

In [None]:
# TODO

### Reduce dimensionality with PCA

In [None]:
# Convert matrix samples to a row
l_fp_flat = []
for i, fp in enumerate(l_fp):
    # Process qx and qy separately
    fp_x = fp[0].flatten()
    fp_y = fp[1].flatten()
    l_fp_flat.append([fp_x, fp_y])

array_fp_flat = np.array(l_fp_flat)
print(array_fp_flat.shape)
# apply tsne to the data: rebuild the 2D matrix with only 1 components for each tune dimension
pca = PCA(n_components=3)
#pca = umap.UMAP(random_state=42)
array_fp_flat_qx_ld = pca.fit_transform(array_fp_flat[:,0,:])
array_fp_flat_qx_compressed = pca.inverse_transform(array_fp_flat_qx_ld)
array_fp_flat_qy_ld = pca.fit_transform(array_fp_flat[:,1,:])
array_fp_flat_qy_compressed = pca.inverse_transform(array_fp_flat_qy_ld)

# Reshape compressed arrays to original shape
array_fp_flat_qx_compressed = np.reshape(array_fp_flat_qx_compressed, (array_fp_flat_qx_compressed.shape[0], l_fp[0][0].shape[0], l_fp[0][0].shape[1]))
array_fp_flat_qy_compressed = np.reshape(array_fp_flat_qy_compressed, (array_fp_flat_qy_compressed.shape[0], l_fp[0][1].shape[0], l_fp[0][1].shape[1]))
array_fp_flat_compressed = np.array([array_fp_flat_qx_compressed, array_fp_flat_qy_compressed])

# switch dimension 0 and 1
array_fp_compressed = np.swapaxes(array_fp_flat_compressed, 0, 1)

print(array_fp_compressed.shape)

In [None]:
# Make a grid of plots for all the footprints
n_cols = 10
n_rows = int(np.ceil(len(l_fp[:sample_nb])/n_cols))
fig, axs = plt.subplots(n_rows, n_cols, figsize=(n_cols*2.5, n_rows*2))
for i, (fp_compressed, fp_original) in enumerate(zip(array_fp_compressed[:sample_nb], l_fp[:sample_nb])):
    try:
        ax = axs[i//n_cols, i%n_cols]
        ax.plot(fp_compressed[0], fp_compressed[1], color="C0")
        ax.plot(fp_compressed[0].T, fp_compressed[1].T, color="C0")
        ax.plot(fp_original[0], fp_original[1], color="C1", alpha = 0.5)
        ax.plot(fp_original[0].T, fp_original[1].T, color="C1", alpha = 0.5)
        ax.set_title(f"Bunch {l_bunch_nb[i]}")
        ax.set_xlabel(r"$\mathrm{Q_x}$")
        ax.set_ylabel(r"$\mathrm{Q_y}$")
        #ax.set_aspect("equal")
        ax.set_title(f"Bunch {l_bunch_nb[i]}")
        ax.grid()
    except:
        pass

plt.tight_layout()
plt.show()

### Cluster the resulting points

In [None]:
from sklearn.cluster import MeanShift, estimate_bandwidth
# Get 4D data
array_fp_ld = np.hstack([array_fp_flat_qx_ld, array_fp_flat_qy_ld])

# Apply HDBSCAN
#clusterer = hdbscan.HDBSCAN(min_cluster_size=5, min_samples=10).fit(array_fp_ld)
#
bandwidth = estimate_bandwidth(array_fp_ld, quantile=0.12, n_samples=len(array_fp_ld))
clusterer = MeanShift(bandwidth=bandwidth, bin_seeding=True).fit(array_fp_ld)
clusterer.labels_


In [None]:
# Plot initial data according to labels
n_cols = 10
n_rows = int(np.ceil(len(l_fp[:sample_nb])/n_cols))
fig, axs = plt.subplots(n_rows, n_cols, figsize=(n_cols*2.5, n_rows*2))
for i, fp in enumerate(l_fp[:sample_nb]):
    try:
        ax = axs[i//n_cols, i%n_cols]
        ax.plot(fp[0], fp[1], color="C" + str(clusterer.labels_[i]))
        ax.plot(fp[0].T, fp[1].T, color="C" + str(clusterer.labels_[i]))
        ax.set_title(f"Bunch {l_bunch_nb[i]}")
        ax.set_xlabel(r"$\mathrm{Q_x}$")
        ax.set_ylabel(r"$\mathrm{Q_y}$")
        #ax.set_aspect("equal")
        ax.set_title(f"Bunch {l_bunch_nb[i]}")
        ax.grid()
    except:
        pass

plt.tight_layout()
plt.show()

#