In [1]:
# Import the necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import sklearn.datasets
import sklearn.model_selection
import sklearn.metrics
import matplotlib.pyplot as plt
import nibabel as nib
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, recall_score, accuracy_score, precision_score

In [None]:
# Assuming data is in NIfTI format

brain_data = nib.load("avg152T1_LR_nifti.hdr")
brain_data = brain_data.get_fdata()
brain_data

In [None]:
# Split the data into training, validation, and test sets
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = sklearn.model_selection.train_test_split(X_train, y_train, test_size=0.25, random_state=42, stratify=y_train)

In [2]:
# Load and preprocess the brain imaging data
# Here we use the sMRI data from the OASIS dataset as an example, but you can use any other dataset
# The sMRI data consists of 416 images of size 176 x 208 x 176, with 100 patients with MDD and 316 healthy controls
X, y = sklearn.datasets.fetch_oasis_vbm(n_subjects=416, dartel_version=1, return_X_y=True)
X = X.reshape(416, 176, 208, 176) # Reshape the data to 4D images
X = torch.from_numpy(X).float() # Convert the data to PyTorch tensors
y = torch.from_numpy(y).long() # Convert the labels to PyTorch tensors

AttributeError: 

In [41]:
import pandas as pd
import numpy as np

# Load your CSV data (replace with your actual file path)
csv_file_path = 'oasis_cross-sectional.csv'
data = pd.read_csv(csv_file_path)

# Create a random array of labels ('not depressed' or 'likely depressed')
random_labels = np.random.choice(['not depressed', 'likely depressed'], size=len(data))

# Assign the random labels to the 'depression_diagnosis' column
data['depression_diagnosis'] = random_labels

# Save the updated DataFrame back to the CSV file
data.to_csv('updated_oasis_cross-sectional.csv', index=False)

print("Random labels added to the 'depression_diagnosis' column.")


Random labels added to the 'depression_diagnosis' column.


In [35]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import nibabel as nib
import sklearn.model_selection
import sklearn.metrics
import pandas as pd

# Load brain imaging data using NiBabel
# Assuming the data is in NIfTI format
brain_data = nib.load("avg152T1_LR_nifti.hdr")
X = brain_data.get_fdata()

data_shape = X.shape
# Load labels (depression diagnosis)
data = pd.read_csv('updated_oasis_cross-sectional.csv')
# Extract labels (0 for 'not depressed', 1 for 'likely depressed')
y = data['depression_diagnosis'].replace({'not depressed': 0, 'likely depressed': 1}).values

# Save labels as a NumPy array file
np.save('oasis_cross-sectional.npy', y)
y = np.load('oasis_cross-sectional.npy', allow_pickle=True)

# Ensure the number of samples in X and y are consistent
num_subjects = X.shape[0]
y = y[:num_subjects]

# Preprocess the data
# Reshape the data to 4D images
X = X.reshape(num_subjects, *data_shape[1:])

# Convert the data and labels to PyTorch tensors
X = torch.from_numpy(X).float()
y = torch.from_numpy(y).long()

# Extract features from the brain imaging data
# Here we use the voxel intensities as the features
# Normalize the voxel intensities
X = X / X.max()
X = X.reshape(num_subjects, -1)

# Select the first 1000 voxels as the features, for simplicity
X = X[:, :1000]
feature_dim = 1000



In [36]:
# Split the data into training, validation, and test sets
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = sklearn.model_selection.train_test_split(X_train, y_train, test_size=0.25, random_state=42, stratify=y_train)


In [37]:
# The DNN consists of three linear layers with ReLU activation and dropout
class DNN(nn.Module):
    def __init__(self, feature_dim, hidden_dim, output_dim):
        super(DNN, self).__init__()
        # Initialize the linear layers of the DNN
        self.fc1 = nn.Linear(feature_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        # Initialize the activation function
        self.relu = nn.ReLU()
        # Initialize the dropout layer
        self.dropout = nn.Dropout(0.5)
        # Initialize the softmax function for the output layer
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        # Pass the features through the linear layers and the activation function
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.softmax(self.fc3(x))
        # Return the output predictions
        return x

In [38]:
# Define the hyperparameters of the DNN
hidden_dim = 64 
output_dim = 2 
learning_rate = 0.01 
batch_size = 32 
epochs = 10 


In [39]:
# Create a DNN object
model = DNN(feature_dim, hidden_dim, output_dim)
# Create an optimizer object
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Create a loss function object
criterion = nn.CrossEntropyLoss()


In [40]:
# Train the DNN on the training set
best_val_loss = float('inf') 
best_model = None 
for epoch in range(epochs):
    model.train()
    train_loss = 0
    train_acc = 0
    # Loop over the training batches
    for i in range(0, len(X_train), batch_size):
        X_batch = X_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]
        optimizer.zero_grad()
        print(X_batch)
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        # Backward pass
        loss.backward()
        # Update the parameters
        optimizer.step()
        # Update the training loss and accuracy
        train_loss += loss.item()
        train_acc += (y_pred.argmax(dim=1) == y_batch).sum().item()
        # Print the progress
        if (i + 1) % 100 == 0:
            print(f'Epoch {epoch + 1}, Batch {i + 1}, Loss: {loss.item():.4f}, Accuracy: {(y_pred.argmax(dim=1) == y_batch).sum().item() / batch_size:.4f}')
    # Compute the average training loss and accuracy
    train_loss /= len(X_train)
    train_acc /= len(X_train)
    # Set the model to evaluation mode
    model.eval()
    # Initialize the validation loss and accuracy
    val_loss = 0
    val_acc = 0
    # Loop over the validation batches
    for i in range(0, len(X_val), batch_size):
        # Get the current batch of features and labels
        X_batch = X_val[i:i+batch_size]
        y_batch = y_val[i:i+batch_size]
        # Forward pass
        y_pred = model(X_batch)
        # Compute the loss
        loss = criterion(y_pred, y_batch)
        # Update the validation loss and accuracy
        val_loss += loss.item()
        val_acc += (y_pred.argmax(dim=1) == y_batch).sum().item()
    # Compute the average validation loss and accuracy
    val_loss /= len(X_val)
    val_acc /= len(X_val)
    # Print the epoch summary
    print(f'Epoch {epoch + 1}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}')
    # Save the model with the lowest validation loss
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model = model


tensor([[0.0627, 0.0667, 0.0706,  ..., 0.0314, 0.0314, 0.0275],
        [0.0471, 0.0510, 0.0510,  ..., 0.0314, 0.0314, 0.0314],
        [0.0745, 0.0784, 0.0745,  ..., 0.0314, 0.0275, 0.0314],
        ...,
        [0.0745, 0.0745, 0.0706,  ..., 0.0314, 0.0314, 0.0275],
        [0.0627, 0.0667, 0.0667,  ..., 0.0314, 0.0275, 0.0275],
        [0.0510, 0.0510, 0.0510,  ..., 0.0353, 0.0314, 0.0314]])


IndexError: Target -9223372036854775808 is out of bounds.