In [None]:
import scipy.io
import numpy as np
import glob
import os

# Path to subject folders
subject_dirs = glob.glob(r'C:\Users\quanz\Documents\UM\Projects\GLX_Project\DNN\betafiles\mindy*')

# Dictionaries to store left and right hemisphere beta data for each subject
left_hem_voxel_beta_values = {}
right_hem_voxel_beta_values = {}

# Load each hemisphere file for each subject
for subject_dir in subject_dirs:
    subject_id = os.path.basename(subject_dir)  # Extract 'mindyXXX' as subject ID
    
    # Load left hemisphere data
    left_hem_path = os.path.join(subject_dir, 'LeftHemUnsm12conds.mat')
    left_hem_data = scipy.io.loadmat(left_hem_path)
    
    # Stack columns from cond1 and cond2 to form a (19076, 12) matrix
    left_hem_beta = np.hstack([left_hem_data['cond1'], left_hem_data['cond2']])
    left_hem_voxel_beta_values[subject_id] = left_hem_beta  # Should now be (19076, 12)

    # Load right hemisphere data
    right_hem_path = os.path.join(subject_dir, 'RightHemUnsm12conds.mat')
    right_hem_data = scipy.io.loadmat(right_hem_path)
    
    # Stack columns from cond1 and cond2 to form a (19076, 12) matrix
    right_hem_beta = np.hstack([right_hem_data['cond1'], right_hem_data['cond2']])
    right_hem_voxel_beta_values[subject_id] = right_hem_beta  # Should now be (19076, 12)

# Optional: Check shapes for one subject to confirm successful loading
example_subject = list(left_hem_voxel_beta_values.keys())[0]
print("Example Subject Left Hemisphere Shape:", left_hem_voxel_beta_values[example_subject].shape)
print("Example Subject Right Hemisphere Shape:", right_hem_voxel_beta_values[example_subject].shape)



In [None]:
# prepare unit matrices for ridge regression
import numpy as np
import os

# Define layer names as per your DNN structure
layer_names = [ 
    'conv1', 'rnorm1', 'pool1', 'conv2', 'rnorm2', 'pool2',
    'conv3', 'conv4_W', 'conv5_W', 'pool5_W', 'conv4_G', 'conv5_G', 'pool5_G'
]

# Path to .npy files for activations
activation_path = r'C:\Users\quanz\Documents\UM\Projects\GLX_Project\DNN\MiND_Stimili'

# Dictionary to store each layer's activation data
activation_matrices = {}

# Load each layer's .npy file and store it in activation_matrices
for layer_name in layer_names:
    # Adjust file name to match your format
    file_path = os.path.join(activation_path, f'reduced_reduced_activation_matrix_{layer_name}.npy')
    layer_data = np.load(file_path)
    
    # Ensure the data shape is (100, 12)
    if layer_data.shape == (100, 120):
        # Average every 10 columns to reduce to (100, 12)
        layer_data = layer_data.reshape(100, 12, 10).mean(axis=2)
    elif layer_data.shape != (100, 12):
        raise ValueError(f"Unexpected shape for {layer_name}: {layer_data.shape}")
    
    # Store the processed data in the dictionary
    activation_matrices[layer_name] = layer_data

# Optional: Check the shapes to confirm all layers are correctly loaded
for layer_name, data in activation_matrices.items():
    print(f"{layer_name}: shape {data.shape}")


In [None]:
# this code is for ridge regression for all of the subjects:
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score
import numpy as np

# Select a single subject ID (e.g., the first subject in your data)
single_subject_id = list(left_hem_voxel_beta_values.keys())[0]

# Assuming activation_matrices is a dictionary with each DNN layer's data (shape: (100, 12))
# left_hem_voxel_beta_values and right_hem_voxel_beta_values contain the beta data for each hemisphere and subject

# Dictionary to store fit scores for each hemisphere and layer
fit_scores = {
    "left": {layer_name: [] for layer_name in activation_matrices.keys()},
    "right": {layer_name: [] for layer_name in activation_matrices.keys()},
}

# Run ridge regression for left and right hemispheres separately
for layer_name, X in activation_matrices.items():  # X has shape (100, 12)
    X = X.T  # Transpose to shape (12, 100), aligning with 12 conditions
    
    # Process left hemisphere data
    for subject_id, left_hem_beta in left_hem_voxel_beta_values.items():
        subject_fit_scores_left = []
        for voxel_idx in range(left_hem_beta.shape[0]):
            y = left_hem_beta[voxel_idx, :]  # Voxel responses across 12 conditions
            ridge = Ridge(alpha=1.0)  # You can adjust alpha as needed
            ridge.fit(X, y)
            y_pred = ridge.predict(X)
            score = r2_score(y, y_pred)  # R² score as the fit metric
            subject_fit_scores_left.append(score)
        fit_scores["left"][layer_name].append(subject_fit_scores_left)

    # Process right hemisphere data
    for subject_id, right_hem_beta in right_hem_voxel_beta_values.items():
        subject_fit_scores_right = []
        for voxel_idx in range(right_hem_beta.shape[0]):
            y = right_hem_beta[voxel_idx, :]  # Voxel responses across 12 conditions
            ridge = Ridge(alpha=1.0)
            ridge.fit(X, y)
            y_pred = ridge.predict(X)
            score = r2_score(y, y_pred)
            subject_fit_scores_right.append(score)
        fit_scores["right"][layer_name].append(subject_fit_scores_right)

# Convert scores to arrays for easier handling (optional)
for hemi in fit_scores:
    for layer_name in fit_scores[hemi]:
        fit_scores[hemi][layer_name] = np.array(fit_scores[hemi][layer_name])


In [None]:
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
import numpy as np

# Select a single subject ID (e.g., the first subject in your data)
single_subject_id = list(left_hem_voxel_beta_values.keys())[0]

# Dictionary to store fit scores for each hemisphere and layer for the single subject
fit_scores_single_subject = {
    "left": {layer_name: [] for layer_name in activation_matrices.keys()},
    "right": {layer_name: [] for layer_name in activation_matrices.keys()},
}

# Run ridge regression for each layer and each hemisphere for the single subject
for layer_name, X in activation_matrices.items():  # X has shape (100, 12)
    X = X.T  # Transpose to shape (12, 100) for compatibility with voxel data

    # Standardize the activations in X (if not already standardized)
    scaler_X = StandardScaler()
    X = scaler_X.fit_transform(X)  # Now X is standardized

    # Process left hemisphere data for the single subject, voxel by voxel
    left_hem_beta = left_hem_voxel_beta_values[single_subject_id]  # Shape: (19076, 12)
    for voxel_idx in range(left_hem_beta.shape[0]):
        y = left_hem_beta[voxel_idx, :]  # Voxel responses across 12 conditions

        # Standardize y
        scaler_y = StandardScaler()
        y_scaled = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()

        # Ridge regression with scaled y
        ridge = Ridge(alpha=10.0)
        ridge.fit(X, y_scaled)

        # Predict and inverse-transform to original scale
        y_pred_scaled = ridge.predict(X)
        y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()

        # Calculate R² score on the original scale
        score = r2_score(y, y_pred)
        fit_scores_single_subject["left"][layer_name].append(score)

    # Process right hemisphere data for the single subject, voxel by voxel
    right_hem_beta = right_hem_voxel_beta_values[single_subject_id]  # Shape: (19076, 12)
    for voxel_idx in range(right_hem_beta.shape[0]):
        y = right_hem_beta[voxel_idx, :]  # Voxel responses across 12 conditions

        # Standardize y
        scaler_y = StandardScaler()
        y_scaled = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()

        # Ridge regression with scaled y
        ridge = Ridge(alpha=10.0)
        ridge.fit(X, y_scaled)

        # Predict and inverse-transform to original scale
        y_pred_scaled = ridge.predict(X)
        y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()

        # Calculate R² score on the original scale
        score = r2_score(y, y_pred)
        fit_scores_single_subject["right"][layer_name].append(score)

# Optional: Convert scores to arrays for easier handling
for hemi in fit_scores_single_subject:
    for layer_name in fit_scores_single_subject[hemi]:
        fit_scores_single_subject[hemi][layer_name] = np.array(fit_scores_single_subject[hemi][layer_name])

# Check scores for the single subject
print("Fit scores for the single subject:", fit_scores_single_subject)


In [None]:

import pandas as pd

# Prepare a list to store each score entry with details
data_to_save = []

# Loop through each hemisphere, layer, and voxel to structure the data
for hemi, layers in fit_scores_single_subject.items():
    for layer_name, scores in layers.items():
        for voxel_idx, score in enumerate(scores):
            data_to_save.append({
                "Hemisphere": hemi,
                "Layer": layer_name,
                "Voxel_Index": voxel_idx,
                "R2_Score": score
            })

# Convert the list of dictionaries to a DataFrame
df_scores = pd.DataFrame(data_to_save)

# Save to CSV
df_scores.to_csv("fit_scores_single_subject.csv", index=False)


In [None]:
# Print shapes of R² score arrays within fit_scores_single_subject
for hemi, layers in fit_scores_single_subject.items():
    print(f"Hemisphere: {hemi}")
    for layer_name, scores in layers.items():
        print(f"  Layer: {layer_name}, Number of voxels: {len(scores)}")