In [1]:
import numpy as np
from sklearn.decomposition import KernelPCA

In [2]:
from soap import *

In [3]:
_, structure, species = read_cif("CIF_files/DB0-m1_o14_smb_repeat.cif")

In [4]:
soap_out = S(structure = structure, species = species)

In [6]:
soap_out.shape

(213, 20)

In [7]:
N_env, N_feat = soap_out.shape

In [8]:
# 1) Build the N_env × N_env kernel among environments.  
#    For example, use an RBF kernel with some gamma > 0.
def rbf_kernel(u, v, gamma=1e-3):
    diff = u - v
    return np.exp(-gamma * (diff @ diff))

K = np.zeros((N_env, N_env))
for i in range(N_env):
    for j in range(N_env):
        K[i, j] = rbf_kernel(soap_out[i], soap_out[j], gamma=1e-3)

# 2) Run Kernel PCA to extract exactly one principal component.
kpca = KernelPCA(n_components=1, kernel="precomputed")
#   - We pass `kernel="precomputed"` because we already built K.
#   - kpca.fit_transform(K) returns an array of shape (N_env, 1),
#     but the corresponding eigenvector α is available as kpca.alphas_.
Z = kpca.fit_transform(K)   # shape = (N_env, 1), equals K @ α / λ

# 3) The principal eigenvector α (length N_env) can be taken from kpca.alphas_.
#    Note: scikit-learn’s KernelPCA stores `alphas_` scaled so that each 
#    column is normalized, but the relative ratios α_i/α_j are what matter.
alpha = kpca.alphas_[:, 0]   # shape = (N_env,)

# 4) Form the weighted sum of S’s rows: d = α^T ⋅ S.
#    That yields a single row of length N_feat.
d = alpha.reshape(1, N_env) @ S   # shape = (1, N_feat)

TypeError: 'function' object is not subscriptable