In [9]:
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import numpy as np

# Load the activation matrix for conv1 (shape: 710016, 12)
layer_file_path = '/home/tpolklabuser/Desktop/Violet/DNN/ML_RR/DNN_Activation/conv1_average_activation_matrix.csv'
activation_matrix = np.loadtxt(layer_file_path, delimiter=',', skiprows=1)  # shape (710016, 12)

# Step 1: Apply PCA to reduce the dimensionality of each unit
n_pca_components = 10  # Number of PCA components for each unit
pca = PCA(n_components=n_pca_components)
reduced_activation_matrix = pca.fit_transform(activation_matrix)  # shape (710016, 10)
print(f"Reduced activation matrix shape (after PCA): {reduced_activation_matrix.shape}")

# Step 2: Apply K-Means to reduce the number of units
n_clusters = 1000  # Number of clusters (this will be the reduced number of units)
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
kmeans.fit(reduced_activation_matrix)

# Step 3: Use cluster centroids as the reduced representation of units
reduced_activation_matrix_final = kmeans.cluster_centers_  # shape (1000, 10)
print(f"Final reduced activation matrix shape (after clustering): {reduced_activation_matrix_final.shape}")

# Save the final reduced activation matrix
np.save(f'/home/tpolklabuser/Desktop/Violet/DNN/ML_RR/DNN_Activation/conv1_reduced_activation_matrix_clusters.npy', reduced_activation_matrix_final)


Reduced activation matrix shape (after PCA): (710016, 10)
Final reduced activation matrix shape (after clustering): (1000, 10)


In [3]:
print(activation_matrix.shape)

(710016, 12)


In [11]:
from sklearn.metrics import mean_squared_error
import numpy as np

# Assuming 'pca' is the PCA model used earlier
# and 'kmeans' is the trained KMeans model
# Load original activation matrix
layer_file_path = '/home/tpolklabuser/Desktop/Violet/DNN/ML_RR/DNN_Activation/conv1_average_activation_matrix.csv'
activation_matrix = np.loadtxt(layer_file_path, delimiter=',', skiprows=1)  # Shape: (710016, 12)

# Step 1: Apply the same PCA transformation to the activation matrix
activation_matrix_pca = pca.transform(activation_matrix)  # Shape: (710016, 10)

# Step 2: Predict the closest centroids for each unit
closest_centroids = kmeans.predict(activation_matrix_pca)

# Step 3: Reconstruct the activation matrix using the closest centroids
reconstructed_matrix_pca = kmeans.cluster_centers_[closest_centroids]  # Shape: (710016, 10)

# Step 4: Inverse transform to get back to the original feature space
reconstructed_matrix = pca.inverse_transform(reconstructed_matrix_pca)  # Shape: (710016, 12)

# Step 5: Calculate the mean squared reconstruction error
reconstruction_error = mean_squared_error(activation_matrix, reconstructed_matrix)
print(f"Reconstruction Error (MSE): {reconstruction_error:.4f}")

# Step 6: Calculate explained variance as a measure of clustering performance
total_variance = np.var(activation_matrix)
explained_variance_ratio = 1 - (reconstruction_error / total_variance)
print(f"Explained Variance by Clustering: {explained_variance_ratio:.2%}")


Reconstruction Error (MSE): 3.3874
Explained Variance by Clustering: 99.09%
