In [None]:
import pandas as pd
import numpy as np

# Path to your activation data file
file_path = r'C:\Users\quanz\Documents\UM\Projects\GLX_Project\DNN\MiND_Stimili\conv1_average_activation_matrix.csv'

# Load the data
data = pd.read_csv(file_path)

# Perform SVD
U, S, Vt = np.linalg.svd(data, full_matrices=False)

# Set the number of components to retain (e.g., 100)
n_components = 100

# Select the top components
U_reduced = U[:, :n_components]
S_reduced = np.diag(S[:n_components])
Vt_reduced = Vt[:n_components, :]

# Form the reduced matrix
reduced_data = np.dot(U_reduced, S_reduced)

# Save the reduced data
output_path = r'C:\Users\quanz\Documents\UM\Projects\GLX_Project\DNN\MiND_Stimili\conv1_svd_100_components.csv'
pd.DataFrame(reduced_data).to_csv(output_path, index=False)

print(f"SVD completed for conv1, saved to {output_path}")


In [None]:
# Calculate total variance
total_variance = np.sum(S**2)

# Calculate explained variance for the top 100 components
n_components = 100
explained_variance = np.sum(S[:n_components]**2)

# Calculate the explained variance ratio
explained_variance_ratio = explained_variance / total_variance
print(f"Explained Variance Ratio for top {n_components} components: {explained_variance_ratio:.2%}")

In [None]:
from sklearn.metrics import mean_squared_error
import numpy as np

# Assuming 'pca' is the PCA model used earlier
# and 'kmeans' is the trained KMeans model
# Load original activation matrix
layer_file_path = '/home/tpolklabuser/Desktop/Violet/DNN/ML_RR/DNN_Activation/conv1_average_activation_matrix.csv'
activation_matrix = np.loadtxt(layer_file_path, delimiter=',', skiprows=1)  # Shape: (710016, 12)

# Step 1: Apply the same PCA transformation to the activation matrix
activation_matrix_pca = pca.transform(activation_matrix)  # Shape: (710016, 10)

# Step 2: Predict the closest centroids for each unit
closest_centroids = kmeans.predict(activation_matrix_pca)

# Step 3: Reconstruct the activation matrix using the closest centroids
reconstructed_matrix_pca = kmeans.cluster_centers_[closest_centroids]  # Shape: (710016, 10)

# Step 4: Inverse transform to get back to the original feature space
reconstructed_matrix = pca.inverse_transform(reconstructed_matrix_pca)  # Shape: (710016, 12)

# Step 5: Calculate the mean squared reconstruction error
reconstruction_error = mean_squared_error(activation_matrix, reconstructed_matrix)
print(f"Reconstruction Error (MSE): {reconstruction_error:.4f}")

# Step 6: Calculate explained variance as a measure of clustering performance
total_variance = np.var(activation_matrix)
explained_variance_ratio = 1 - (reconstruction_error / total_variance)
print(f"Explained Variance by Clustering: {explained_variance_ratio:.2%}")


In [32]:
# This codes using SVD instead of PCA, according to Justine Zhang report, 
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import TruncatedSVD

# Load or create your activation matrix
file = r'C:\Users\quanz\Documents\UM\Projects\GLX_Project\DNN\MiND_Stimili\pool5_G_both_combined_matrix.csv'
activation_matrix = np.loadtxt(file, delimiter=',', skiprows=1) # Example loading step

# transform to .npy for futher use
# np.save('conv1_both_combined_matrix.npy', activation_matrix)

# Define the number of components to keep (adjust based on the desired explained variance)
n_components = 80  # Choose an appropriate number based on variance explained or experiment needs

activation_matrix_centered = activation_matrix - np.mean(activation_matrix, axis=0)

scaler = StandardScaler()
activation_matrix_standardized = scaler.fit_transform(activation_matrix_centered)

# Transpose the matrix to reduce along the units dimension (710061)
activation_matrix_T = activation_matrix_standardized.T  # Now shape is [120, 710061]


# Perform Truncated SVD
svd = TruncatedSVD(n_components=n_components)
reduced_matrix = svd.fit_transform(activation_matrix_T)


# Transpose back to retain the structure where rows represent units
reduced_matrix = reduced_matrix.T  # Final shape will be [n_components, 120]

# Print the shape of the reduced matrix to verify dimensions
print("Reduced matrix shape:", reduced_matrix.shape)

# Print the explained variance ratio to understand how much of the original data is captured in the reduced matrix.
print("Explained variance ratio:", svd.explained_variance_ratio_.sum())

# Optional: Inspect the singular values, which represent the importance of each reduced dimension.
# High singular values correlate with high variance directions, often capturing meaningful structures in the data.
singular_values = svd.singular_values_
print("Singular values:", singular_values)

# Save the reduced matrix for future analysis or model input
# np.save('reduced_activation_matrix.npy', reduced_matrix)

# np.save('C:\\Users\\quanz\\Documents\\UM\\Projects\\DNN_in_Neural_Dedifferentiation\\SVD_results\\reduced_activation_matrix_conv5_G.npy', reduced_matrix)


Reduced matrix shape: (80, 120)
Explained variance ratio: 0.8551802769260989
Singular values: [639.70775864 415.54301683 280.68413396 261.55004108 225.63190035
 221.77132014 211.94301011 202.81815524 193.31881494 184.95515144
 161.54688766 159.07377833 153.56660484 149.59882859 144.78090221
 143.23613521 142.45574147 137.20341162 135.77608254 133.4640699
 132.30314033 131.21407814 130.92991277 130.79617742 128.08121813
 127.72939914 126.58850306 125.98338387 125.58768852 124.65807729
 124.19321259 122.1503094  121.5848747  120.90303872 119.40566192
 118.917725   117.46694096 116.75134808 116.12525498 115.49945305
 115.24124712 114.56923533 114.01201535 113.3623525  112.61956307
 112.46778952 111.47100007 111.25858944 110.61980121 109.96072399
 109.30973278 108.08741678 107.89122056 107.13545779 106.6157933
 106.38528047 105.23605267 104.42344982 103.75568613 103.17074599
 102.83106847 101.85204226 101.69781394 101.37531056 100.82425433
 100.04470097  99.93869785  99.19405773  98.139930

In [12]:
import numpy as np

# Load the reduced activation matrix from the file
file_path = r'C:\Users\quanz\Documents\UM\Projects\DNN_in_Neural_Dedifferentiation\SVD_results\reduced_activation_matrix_rnorm1.npy'
reduced_activation_matrix = np.load(file_path)

# Print the shape of the loaded matrix
print(reduced_activation_matrix.shape)


(90, 120)


In [None]:
# This code will average the SVD matrix across time domain, so make the output from the previous chunck [100x120] to [100x6] condition-wise to prepare for risdge regression
import numpy as np
import glob

# Load all .npy files
file_paths = glob.glob(r'C:\Users\quanz\Documents\UM\Projects\GLX_Project\DNN\MiND_Stimili\reduced_activation_matrix_*.npy')  # Adjust path as needed
activation_matrices = {file.split('\\')[-1].split('.')[0]: np.load(file) for file in file_paths}

# Function to average every 10 columns
def average_columns(matrix, group_size=10):
    return matrix.reshape(matrix.shape[0], -1, group_size).mean(axis=2)

# Apply column averaging to each matrix
reduced_matrices = {}
for name, matrix in activation_matrices.items():
    reduced_matrix = average_columns(matrix, group_size=10)  # Shape will be (100, 12)
    reduced_matrices[name] = reduced_matrix

# Optional: Save reduced matrices if you need them for later
for name, reduced_matrix in reduced_matrices.items():
    np.save(f"C:\\Users\\quanz\\Documents\\UM\\Projects\\GLX_Project\\DNN\\MiND_Stimili\\reduced_{name}.npy", reduced_matrix)



In [16]:
# This code help update the average across time matrix for a specific layer
import numpy as np

# Specify the file path for conv3
file_path = r'C:\Users\quanz\Documents\UM\Projects\GLX_Project\DNN\MiND_Stimili\reduced_activation_matrix_conv5_G.npy'

# Load the specific file
activation_matrix = np.load(file_path)

# Function to average every 10 columns
def average_columns(matrix, group_size=10):
    return matrix.reshape(matrix.shape[0], -1, group_size).mean(axis=2)

# Apply column averaging
reduced_matrix = average_columns(activation_matrix, group_size=10)  # Shape will be (100, 12)

# Save the reduced matrix
output_path = r'C:\Users\quanz\Documents\UM\Projects\GLX_Project\DNN\MiND_Stimili\reduced_reduced_activation_matrix_conv5_G.npy'
np.save(output_path, reduced_matrix)

print(f"Reduced matrix saved to: {output_path}")


Reduced matrix saved to: C:\Users\quanz\Documents\UM\Projects\GLX_Project\DNN\MiND_Stimili\reduced_reduced_activation_matrix_conv5_G.npy


In [6]:
print(name)


NameError: name 'name' is not defined

In [None]:
# To see how much variance it explaines: 
total_variance_explained = svd.explained_variance_ratio_.sum()
print("Total variance explained:", total_variance_explained)

import matplotlib.pyplot as plt

cumulative_variance = np.cumsum(svd.explained_variance_ratio_)
plt.plot(cumulative_variance)
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.title('Explained Variance by Number of Components')
plt.show()