In [8]:
import pandas as pd
import numpy as np

# Path to your activation data file
file_path = r'C:\Users\quanz\Documents\UM\Projects\GLX_Project\DNN\MiND_Stimili\conv1_average_activation_matrix.csv'

# Load the data
data = pd.read_csv(file_path)

# Perform SVD
U, S, Vt = np.linalg.svd(data, full_matrices=False)

# Set the number of components to retain (e.g., 100)
n_components = 100

# Select the top components
U_reduced = U[:, :n_components]
S_reduced = np.diag(S[:n_components])
Vt_reduced = Vt[:n_components, :]

# Form the reduced matrix
reduced_data = np.dot(U_reduced, S_reduced)

# Save the reduced data
output_path = r'C:\Users\quanz\Documents\UM\Projects\GLX_Project\DNN\MiND_Stimili\conv1_svd_100_components.csv'
pd.DataFrame(reduced_data).to_csv(output_path, index=False)

print(f"SVD completed for conv1, saved to {output_path}")


SVD completed for conv1, saved to C:\Users\quanz\Documents\UM\Projects\GLX_Project\DNN\MiND_Stimili\conv1_svd_100_components.csv


In [9]:
# Calculate total variance
total_variance = np.sum(S**2)

# Calculate explained variance for the top 100 components
n_components = 100
explained_variance = np.sum(S[:n_components]**2)

# Calculate the explained variance ratio
explained_variance_ratio = explained_variance / total_variance
print(f"Explained Variance Ratio for top {n_components} components: {explained_variance_ratio:.2%}")

Explained Variance Ratio for top 100 components: 100.00%


In [11]:
from sklearn.metrics import mean_squared_error
import numpy as np

# Assuming 'pca' is the PCA model used earlier
# and 'kmeans' is the trained KMeans model
# Load original activation matrix
layer_file_path = '/home/tpolklabuser/Desktop/Violet/DNN/ML_RR/DNN_Activation/conv1_average_activation_matrix.csv'
activation_matrix = np.loadtxt(layer_file_path, delimiter=',', skiprows=1)  # Shape: (710016, 12)

# Step 1: Apply the same PCA transformation to the activation matrix
activation_matrix_pca = pca.transform(activation_matrix)  # Shape: (710016, 10)

# Step 2: Predict the closest centroids for each unit
closest_centroids = kmeans.predict(activation_matrix_pca)

# Step 3: Reconstruct the activation matrix using the closest centroids
reconstructed_matrix_pca = kmeans.cluster_centers_[closest_centroids]  # Shape: (710016, 10)

# Step 4: Inverse transform to get back to the original feature space
reconstructed_matrix = pca.inverse_transform(reconstructed_matrix_pca)  # Shape: (710016, 12)

# Step 5: Calculate the mean squared reconstruction error
reconstruction_error = mean_squared_error(activation_matrix, reconstructed_matrix)
print(f"Reconstruction Error (MSE): {reconstruction_error:.4f}")

# Step 6: Calculate explained variance as a measure of clustering performance
total_variance = np.var(activation_matrix)
explained_variance_ratio = 1 - (reconstruction_error / total_variance)
print(f"Explained Variance by Clustering: {explained_variance_ratio:.2%}")


Reconstruction Error (MSE): 3.3874
Explained Variance by Clustering: 99.09%


In [6]:
# This codes using SVD instead of PCA, according to Hustin Zhang report, 
import numpy as np
from sklearn.decomposition import TruncatedSVD

# Load or create your activation matrix
file = r'C:\Users\quanz\Documents\UM\Projects\GLX_Project\DNN\MiND_Stimili\conv1_both_combined_matrix.csv'
activation_matrix = np.loadtxt(file, delimiter=',') # Example loading step

# transform to .npy for futher use
np.save('activation_matrix_conv1.npy', activation_matrix)

# Define the number of components to keep (adjust based on the desired explained variance)
n_components = 100  # Choose an appropriate number based on variance explained or experiment needs


# Transpose the matrix to reduce along the units dimension (710061)
activation_matrix_T = activation_matrix.T  # Now shape is [120, 710061]


# Perform Truncated SVD
svd = TruncatedSVD(n_components=n_components)
reduced_matrix = svd.fit_transform(activation_matrix_T)


# Transpose back to retain the structure where rows represent units
reduced_matrix = reduced_matrix.T  # Final shape will be [n_components, 120]

# Print the shape of the reduced matrix to verify dimensions
print("Reduced matrix shape:", reduced_matrix.shape)

# Print the explained variance ratio to understand how much of the original data is captured in the reduced matrix.
print("Explained variance ratio:", svd.explained_variance_ratio_.sum())

# Optional: Inspect the singular values, which represent the importance of each reduced dimension.
# High singular values correlate with high variance directions, often capturing meaningful structures in the data.
singular_values = svd.singular_values_
print("Singular values:", singular_values)

# Save the reduced matrix for future analysis or model input
np.save('reduced_activation_matrix.npy', reduced_matrix)

Reduced matrix shape: (100, 120)
Explained variance ratio: 0.9563138547342261
Singular values: [175927.10872973  23455.35208629  18763.77234434  17525.40415304
  16478.5703798   14002.14858853  13368.28377158  13116.52421365
  11955.99936045  11851.88772349  11784.80962041  11468.27773259
  11112.05824488  10767.43831346  10639.27277937  10283.59845209
   9831.01489034   9796.16918934   9702.94765689   9527.76731141
   9385.7051551    9282.26367376   9228.59024133   9055.45420916
   8891.69612046   8781.28071      8698.41015543   8492.67436424
   8435.90255679   8411.36581999   8255.11426234   8163.80212242
   8102.22008208   7995.39542272   7957.55914337   7845.89053559
   7691.70818844   7659.39927169   7589.98522617   7502.44361049
   7365.8297676    7344.16192377   7294.10683456   7158.71667711
   7100.61957272   7030.50306231   6986.38985742   6959.33126507
   6847.68102135   6787.35976925   6768.96606304   6735.97536823
   6645.17233183   6566.27717321   6561.69787161   6488.8961