In [13]:
%run "./1. Data Loading.ipynb"

X: (800, 360, 25)
Y: (800,)


In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn import metrics
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import StratifiedKFold, train_test_split

In [15]:
random_seed = 42
np.random.seed(random_seed)
torch.manual_seed(random_seed)

seed_region = "L_pOFC"
target_network = "Visual2"

In [16]:
# print(list(zip(regions[0], regions[1])))
# print(set(regions[1]))
# list(regions[0][regions[1] == target_network])

In [17]:
# Helper function:
def find_indices_matching_string(arr, target_string):
    indices = []
    for index, item in enumerate(arr):
        if item == target_string:
            indices.append(index)
    return indices

In [18]:
# Average related conditions in each run.
averaged_data = {}
for subject_index, subject_id in enumerate(manifest):
    averaged_data[subject_id] = []

    for run_index, run in enumerate(manifest[subject_id]):

        neut_block = []
        fear_block = []

        for block_index, block_lookup in enumerate(run["condition_spans"]): 

            if (block_index+1 == 6):
                break
            
            start_index = block_lookup['Frames'][0]
            end_index = block_lookup['Frames'][1]
            condition = 0 if block_lookup['Condition'] == "Neut" else 1
            
            block_data = run["data"][:, start_index : end_index]

            if (condition == 0):
                neut_block.append(block_data)
            elif (condition == 1):
                fear_block.append(block_data)

        neut = np.mean(neut_block, axis=0)
        fear = np.mean(fear_block, axis=0)

        averaged_data[subject_id].append({
            "neut": neut,
            "fear": fear
        })

print(averaged_data["100307"][0]["neut"].shape)
print(averaged_data["100307"][0]["fear"].shape)

(360, 25)
(360, 25)


In [19]:
# Generate correlation matrix for our seed region against a target network.
seed_index = list(regions[0]).index(seed_region)
target_network_indices = find_indices_matching_string(regions[1], target_network)

# NOTE: seed_region must not be in target_region or logic error. 
if regions[1][seed_index] == target_network:
    print("Error: seed_region must not be in target_region.")
    quit()

for subject_index, subject_id in enumerate(averaged_data):
    for run_index, run in enumerate(averaged_data[subject_id]):
        neut_data = run["neut"]
        fear_data = run["fear"]

        neut_correlation_matrix = np.corrcoef(neut_data)
        fear_correlation_matrix = np.corrcoef(fear_data)

        neut_seed_matrix = neut_correlation_matrix[seed_index, target_network_indices]
        fear_seed_matrix = fear_correlation_matrix[seed_index, target_network_indices]

        averaged_data[subject_id][run_index]["neut_seed_matrix"] = neut_seed_matrix
        averaged_data[subject_id][run_index]["fear_seed_matrix"] = fear_seed_matrix

        if subject_index == 0 and run_index == 0:
            print(f"neut_seed_matrix: \n{list(zip(regions[0][target_network_indices], neut_seed_matrix))}\n")
            print(f"neut_seed_matrix: \n{list(zip(regions[0][target_network_indices], fear_seed_matrix))}")


neut_seed_matrix: 
[('R_MST', 0.41026638916109404), ('R_V6', -0.12584086233720332), ('R_V2', 0.16350283454606582), ('R_V3', 0.10164023674816663), ('R_V4', 0.1742921439970502), ('R_V8', 0.07144509358999686), ('R_V3A', 0.3155000156988965), ('R_V7', -0.28918997504980676), ('R_IPS1', 0.40191703906871734), ('R_FFC', -0.17535376882787188), ('R_V3B', -0.04810572197329674), ('R_LO1', 0.21418740038041453), ('R_LO2', 0.22949048534275032), ('R_PIT', 0.1380744811103511), ('R_MT', 0.25998739223256595), ('R_LIPv', -0.0440598133646109), ('R_VIP', -0.09723174642799957), ('R_PH', 0.059358080512050666), ('R_V6A', 0.1268679600514142), ('R_VMV1', 0.2750005066834736), ('R_VMV3', 0.26134156820864796), ('R_V4t', -0.013591582290667729), ('R_FST', 0.17816261004990563), ('R_V3CD', 0.07774474201684844), ('R_LO3', 0.24496161789357662), ('R_VMV2', -0.08808508561260234), ('R_VVC', -0.15527899585831775), ('L_MST', 0.0755794561238915), ('L_V6', -0.19785367561502304), ('L_V2', 0.09635532406381839), ('L_V3', -0.3396968


neut_seed_matrix: 
[('R_MST', 0.0576874010689234), ('R_V6', -0.08514668619064643), ('R_V2', -0.06282316159636277), ('R_V3', -0.024810608170774656), ('R_V4', 0.015371630268096353), ('R_V8', -0.27674722943757746), ('R_V3A', -0.14571089394790673), ('R_V7', -0.08190676449034218), ('R_IPS1', -0.12939220540922453), ('R_FFC', -0.1379138344156191), ('R_V3B', 0.07587657909589683), ('R_LO1', -0.29279725437672616), ('R_LO2', -0.005313311811503042), ('R_PIT', 0.1124111331429093), ('R_MT', -0.1809752749026213), ('R_LIPv', 0.04670627155628109), ('R_VIP', -0.25489381954367374), ('R_PH', -0.05564504207080946), ('R_V6A', 0.04622414979669199), ('R_VMV1', 0.21706043099904335), ('R_VMV3', 0.026458091645909714), ('R_V4t', -0.10143403368537583), ('R_FST', -0.3468702920476156), ('R_V3CD', -0.14710335821779696), ('R_LO3', -0.2545726125777376), ('R_VMV2', 0.0967580407398259), ('R_VVC', -0.031835030245846316), ('L_MST', -0.2872636976434586), ('L_V6', -0.16648553935951332), ('L_V2', -0.01715621099152137), ('L_V

In [20]:
# Prepare the data for modeling:
X = []
Y = []

for subject_index, subject_id in enumerate(averaged_data):
    for run_index, run in enumerate(averaged_data[subject_id]):

            X.append(averaged_data[subject_id][run_index]["neut_seed_matrix"])
            Y.append(0)

            X.append(averaged_data[subject_id][run_index]["fear_seed_matrix"])
            Y.append(1)

X = np.array(X)
Y = np.array(Y)

print(f"X: {X.shape}")
print(f"Y: {Y.shape}")

X: (400, 54)
Y: (400,)


In [21]:
# Network definition
class SimpleNN(nn.Module):
    def __init__(self, num_features, hidden_size, output_size, dropout_rate=0.5):
        super(SimpleNN, self).__init__()

        self.fc1 = nn.Linear(num_features, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.fc3 = nn.Linear(hidden_size // 2, output_size)
        self.dropout = nn.Dropout(p=dropout_rate)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.fc2(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.fc3(x)
        return x

In [22]:
# Data split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, stratify=Y, train_size=0.8, test_size=0.2, random_state=random_seed)

print(f"X_train: {X_train.shape}, Y_train {Y_train.shape}")
print(f"X_test: {X_test.shape}, Y_test {Y_test.shape}")

X_train: (320, 54), Y_train (320,)
X_test: (80, 54), Y_test (80,)


In [23]:
# Training loop
model_dir = "models"

num_samples = len(X_train)
num_features = len(X_train[0])
num_classes = 2
hidden_size = num_features * 2

num_epochs = 100
num_folds = 10

#learning_rate = 0.01 # 87%
learning_rate = 0.005 # 86%
#learning_rate = 0.001 # 86%
#learning_rate = 0.0005 # 86%
#learning_rate = 0.0001 # 57%

batch_size = 32

k_sampler = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=random_seed)

# Create a SummaryWriter object
writer = SummaryWriter()

for fold_index, (train_indices, test_indices) in enumerate(k_sampler.split(X_train, Y_train)):
    print(f"Fold {fold_index + 1}:")

    np.save(f'./models/model_{fold_index + 1}_train.npy', train_indices)
    np.save(f'./models/model_{fold_index + 1}_test.npy', test_indices)

    train_length = len(train_indices)
    test_length = len(test_indices)
    x_train, y_train = X[train_indices], Y[train_indices]
    x_test, y_test = X[test_indices], Y[test_indices]

    model = SimpleNN(num_features, hidden_size, num_classes).float()

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Model Training:
    for epoch_index in range(num_epochs):
        train_loss = 0.0

        model.train()
        for batch_index in range(0, train_length, batch_size):
            inputs = torch.from_numpy(x_train[batch_index: batch_index + batch_size]).float()
            labels = torch.from_numpy(y_train[batch_index: batch_index + batch_size]).long()

            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        avg_train_loss = train_loss / (train_length // batch_size)

        # Write training loss to TensorBoard
        writer.add_scalar('Loss/Train', avg_train_loss, fold_index * num_epochs + epoch_index)

        val_loss = 0.0
        correct = 0
        total = 0

        # Model Evaluation:
        model.eval()
        with torch.no_grad():
            inputs = torch.from_numpy(x_test).float()
            labels = torch.from_numpy(y_test).long()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            # Compute accuracy
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        if ((epoch_index+1) % 25 == 0):
            print(f"\tEpoch {epoch_index+1}:")
            print(f"\t\tTrain Loss: {avg_train_loss:.4f}")
            print(f"\t\tTest Loss: {val_loss / test_length:.4f}")
            print(f"\t\tTest Accuracy: {(100 * correct / total):.2f}%")

        # Write validation loss and accuracy to TensorBoard
        writer.add_scalar('Loss/Test', val_loss / test_length, fold_index * num_epochs + epoch_index)
        writer.add_scalar('Accuracy/Test', 100 * correct / total, fold_index * num_epochs + epoch_index)
    
    # Save the model.
    torch.save(model, f'./models/model_{fold_index + 1}.pth')

# Close the SummaryWriter
writer.close()


Fold 1:
	Epoch 25:
		Train Loss: 0.2299
		Test Loss: 0.0512
		Test Accuracy: 40.62%
	Epoch 50:
		Train Loss: 0.1586
		Test Loss: 0.0817
		Test Accuracy: 43.75%
	Epoch 75:
		Train Loss: 0.0807
		Test Loss: 0.1064
		Test Accuracy: 43.75%
	Epoch 100:
		Train Loss: 0.0511
		Test Loss: 0.1070
		Test Accuracy: 46.88%
Fold 2:
	Epoch 25:
		Train Loss: 0.2217
		Test Loss: 0.0560
		Test Accuracy: 43.75%
	Epoch 50:
		Train Loss: 0.0866
		Test Loss: 0.0964
		Test Accuracy: 40.62%
	Epoch 75:
		Train Loss: 0.0758
		Test Loss: 0.1108
		Test Accuracy: 53.12%
	Epoch 100:
		Train Loss: 0.0691
		Test Loss: 0.0993
		Test Accuracy: 53.12%
Fold 3:
	Epoch 25:
		Train Loss: 0.2427
		Test Loss: 0.0639
		Test Accuracy: 46.88%
	Epoch 50:
		Train Loss: 0.0986
		Test Loss: 0.1019
		Test Accuracy: 46.88%
	Epoch 75:
		Train Loss: 0.0926
		Test Loss: 0.1158
		Test Accuracy: 43.75%
	Epoch 100:
		Train Loss: 0.0801
		Test Loss: 0.1336
		Test Accuracy: 40.62%
Fold 4:
	Epoch 25:
		Train Loss: 0.2535
		Test Loss: 0.0362
	

In [24]:
# Evaluate across K folds.
x_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(Y_test)

accuracy_scores = []
confusion_matrices = []
f1_scores = []
precision_scores = []
recall_scores = []
roc_auc_scores = []
cohen_kappa_scores = []

for fold_index in range(num_folds):
    model_path = f'./models/model_{fold_index + 1}.pth'
    model = torch.load(model_path)
    model.eval()

    with torch.no_grad():
        logits = model(x_test)
        probabilities = F.softmax(logits, dim=1)
        _, predicted_labels = torch.max(probabilities, dim=1)

        # Calculate metrics for this fold and store the results
        accuracy_scores.append(metrics.accuracy_score(y_test, predicted_labels))
        confusion_matrices.append(metrics.confusion_matrix(y_test, predicted_labels))
        f1_scores.append(metrics.f1_score(y_test, predicted_labels))
        precision_scores.append(metrics.precision_score(y_test, predicted_labels))
        recall_scores.append(metrics.recall_score(y_test, predicted_labels))
        roc_auc_scores.append(metrics.roc_auc_score(y_test, predicted_labels))
        cohen_kappa_scores.append(metrics.cohen_kappa_score(y_test, predicted_labels))

# Calculate the mean values for all metrics across folds
mean_accuracy = np.mean(accuracy_scores)
mean_f1_score = np.mean(f1_scores)
mean_precision = np.mean(precision_scores)
mean_recall = np.mean(recall_scores)
mean_roc_auc = np.mean(roc_auc_scores)
mean_cohen_kappa = np.mean(cohen_kappa_scores)

print(f"{seed_region} vs. {target_network}")

# Print the mean values for all metrics
print(f"\tMean Accuracy: {mean_accuracy:.4f}")
print(f"\tMean F1 Score: {mean_f1_score:.4f}")
print(f"\tMean Precision: {mean_precision:.4f}")
print(f"\tMean Recall: {mean_recall:.4f}")
print(f"\tMean ROC AUC: {mean_roc_auc:.4f}")
print(f"\tMean Cohen's Kappa: {mean_cohen_kappa:.4f}")


L_pOFC vs. Visual2
	Mean Accuracy: 0.8625
	Mean F1 Score: 0.8633
	Mean Precision: 0.8598
	Mean Recall: 0.8675
	Mean ROC AUC: 0.8625
	Mean Cohen's Kappa: 0.7250
