In [1]:
import os
import sklearn
from sklearn import metrics
from sklearn.metrics import roc_auc_score
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.python.eager import context
from tensorflow import keras
from tensorflow.keras.layers import Input
import numpy as np
import random
import time
import math
import pandas as pd
import pylab as plt
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split

2025-05-07 10:46:43.865934: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [2]:
os.chdir("/data/home/lmx/psy_test/v1_20241016/")

In [3]:
os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
os.environ['TF_DETERMINISTIC_OPS'] = '1'
os.environ['TF_CUDNN_USE_FRONTEND'] = '1'

os.environ["PYTHONHASHSEED"] = '0'

tf.config.threading.set_inter_op_parallelism_threads(1)
tf.config.threading.set_intra_op_parallelism_threads(1)
    
tf.config.set_soft_device_placement = False
tf.config.experimental.set_memory_growth = True
gpus = tf.config.experimental.list_physical_devices('GPU')
print("gpus:", gpus)
 
if gpus:
    tf.config.experimental.set_virtual_device_configuration(gpus[1], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024)])
    tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024)])
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), len(logical_gpus), 'Logical gpus')
    
### set background seed
seed = 42

random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.experimental.numpy.random.seed(seed)

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

x_train = np.load('./data/x_train.npy',allow_pickle=True)
y_train = np.load('./data/y_train.npy',allow_pickle=True)
print(x_train[0].shape)
x_train = np.expand_dims(x_train, -1)
x_train = x_train.astype('float32') 


num_classes = 2
input_shape = x_train.shape[1:]

gpus: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
2 2 Logical gpus
(7, 7)


2025-05-07 10:46:45.238793: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2025-05-07 10:46:45.253480: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:25:00.0 name: Tesla V100-PCIE-32GB computeCapability: 7.0
coreClock: 1.38GHz coreCount: 80 deviceMemorySize: 31.75GiB deviceMemoryBandwidth: 836.37GiB/s
2025-05-07 10:46:45.253748: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 1 with properties: 
pciBusID: 0000:5b:00.0 name: Tesla V100-PCIE-32GB computeCapability: 7.0
coreClock: 1.38GHz coreCount: 80 deviceMemorySize: 31.75GiB deviceMemoryBandwidth: 836.37GiB/s
2025-05-07 10:46:45.253773: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2025-05-07 10:46:45.255973: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.

In [4]:
sex_train = pd.read_csv('./data/data_clean.csv', index_col='Unnamed: 0')['Gender']

### set sex female=0 male=1 
sex_train = sex_train.replace(['F','女'],0)
sex_train = sex_train.replace(['M','男'],1)

sex_train.value_counts()

sex_train = np.array(sex_train).astype(np.float32)

age_train = pd.read_csv('./data/data_clean.csv', index_col='Unnamed: 0')['Age']

age_train = np.array(age_train).astype(np.float32)

In [5]:
df = pd.read_csv('./data/data_clean.csv', index_col='Unnamed: 0')
df = df.reset_index()

# Randomized label -- MentalAId with sex & gender 

## network architecture

In [6]:
# define bottleneck
class BottleNeck(tf.keras.layers.Layer):
    def __init__(self, growth_rate, drop_rate):
        super(BottleNeck, self).__init__()
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.conv1 = tf.keras.layers.Conv2D(filters=4 * growth_rate,
                                            kernel_size=(1, 1),
                                            strides=1,
                                            padding="same")
        self.bn2 = tf.keras.layers.BatchNormalization()
        self.conv2 = tf.keras.layers.Conv2D(filters=growth_rate,
                                            kernel_size=(3, 3),
                                            strides=1,
                                            padding="same")
        self.dropout = tf.keras.layers.Dropout(rate=drop_rate)
        
        self.listLayers = [self.bn1,
                           tf.keras.layers.Activation("relu"),
                           self.conv1,
                           self.bn2,
                           tf.keras.layers.Activation("relu"),
                           self.conv2,
                           self.dropout]

    def call(self, x):
        tf.random.set_seed(seed)
        y = x
        for layer in self.listLayers.layers:
            y = layer(y)
        y = tf.keras.layers.concatenate([x,y], axis=-1)
        return y

# define dense block
class DenseBlock(tf.keras.layers.Layer):
    def __init__(self, num_layers, growth_rate, drop_rate=0.5):
        super(DenseBlock, self).__init__()
        self.num_layers = num_layers
        self.growth_rate = growth_rate
        self.drop_rate = drop_rate
        self.listLayers = []
        for _ in range(num_layers):
            self.listLayers.append(BottleNeck(growth_rate=self.growth_rate, drop_rate=self.drop_rate))

    def call(self, x):
        tf.random.set_seed(seed)
        for layer in self.listLayers.layers:
            x = layer(x)
        return x

# define transition
class TransitionLayer(tf.keras.layers.Layer):
    def __init__(self, out_channels):
        super(TransitionLayer, self).__init__()
        self.bn = tf.keras.layers.BatchNormalization()
        self.conv = tf.keras.layers.Conv2D(filters=out_channels,
                                           kernel_size=(1, 1),
                                           strides=1,
                                           padding="same")
        self.pool = tf.keras.layers.MaxPool2D(pool_size=(2, 2),
                                              strides=2,
                                              padding="same")

    def call(self, inputs):
        tf.random.set_seed(seed)
        x = self.bn(inputs)
        x = tf.keras.activations.relu(x)
        x = self.conv(x)
        x = self.pool(x)
        return x

# define dense net
class DenseNet(tf.keras.Model):
    def __init__(self, num_init_features, growth_rate, block_layers, compression_rate, drop_rate):
        super(DenseNet, self).__init__()

        self.conv = tf.keras.layers.Conv2D(filters=num_init_features,
                                           kernel_size=(3, 3),
                                           strides=1,
                                           input_shape = (7,7,1),
                                           padding="same")
        self.bn = tf.keras.layers.BatchNormalization()

        self.num_channels = num_init_features
        self.dense_block_1 = DenseBlock(num_layers=block_layers[0], growth_rate=growth_rate, drop_rate=drop_rate)
        self.num_channels += growth_rate * block_layers[0]
        self.num_channels = compression_rate * self.num_channels
        self.transition_1 = TransitionLayer(out_channels=int(self.num_channels))
        self.dense_block_2 = DenseBlock(num_layers=block_layers[1], growth_rate=growth_rate, drop_rate=drop_rate)
        self.num_channels += growth_rate * block_layers[1]
        self.num_channels = compression_rate * self.num_channels
        self.transition_2 = TransitionLayer(out_channels=int(self.num_channels))
        self.dense_block_3 = DenseBlock(num_layers=block_layers[2], growth_rate=growth_rate, drop_rate=drop_rate)
        
        self.avgpool = tf.keras.layers.GlobalAveragePooling2D()
        
        # MLP layers for age and gender (this module is added)
        self.age_dense = tf.keras.layers.Dense(units=16, activation='relu', name='age_dense')
        self.gender_dense = tf.keras.layers.Dense(units=16, activation='relu', name='gender_dense')
        
        # fc
        self.fc = tf.keras.layers.Dense(units=2,
                                        activation=tf.keras.activations.softmax)

    def call(self, inputs):
        # inputs should now include image, age, gender
        image_input, age_input, gender_input = inputs
        
        # tf.random.set_seed(seed)
        x = self.conv(image_input)
        x = self.bn(x)
        x = tf.keras.activations.relu(x)

        x = self.dense_block_1(x)
        x = self.transition_1(x)
        x = self.dense_block_2(x)
        x = self.transition_2(x)
        x = self.dense_block_3(x)

        x = self.avgpool(x)
                
        ## Processing age and gender inputs through their respective dense layers               
        age_features = self.age_dense(age_input)
        gender_features = self.gender_dense(gender_input)
        
        # Concatenate image features with age and gender features
        x = tf.concat([x, age_features, gender_features], axis=-1)
        
        x = self.fc(x)

        return x

In [7]:
def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

## training with permutation

In [None]:
class RandomizationTest(object):
    def __init__(self, num_init_features, growth_rate, block_layers, compression_rate, drop_rate, no_epochs_1, no_epochs_2, lr_1, lr_2):
        self.num_init_features = num_init_features
        self.growth_rate = growth_rate
        self.block_layers = block_layers
        self.compression_rate = compression_rate
        self.drop_rate = drop_rate
        self.no_epochs_1 = no_epochs_1
        self.no_epochs_2 = no_epochs_2
        self.lr_1 = lr_1
        self.lr_2 = lr_2
        
    def _bootstrap_ci(self, data, n_bootstraps=1000):
        """Calculate bootstrap 95% confidence intervals"""
        bootstrapped_stats = []
        for _ in range(n_bootstraps):
            resample = np.random.choice(data, size=len(data), replace=True)
            bootstrapped_stats.append(np.mean(resample))
        
        lower = np.percentile(bootstrapped_stats, 2.5)
        upper = np.percentile(bootstrapped_stats, 97.5)
        return lower, upper
    
    def run_test(self, x_train, y_train, age_train, sex_train, num_repeats=100, test_size=0.1):
        all_acc = []
        all_prec = []
        all_rec = []
        all_spec = []
        all_mcc = []
        all_auc = []
        
        for repeat in range(num_repeats):
            print(f"\nStarting randomization test iteration {repeat+1}/{num_repeats}")
            
            # Shuffle the labels while keeping features intact
            shuffled_y = y_train.copy()
            np.random.shuffle(shuffled_y)
            
            # Split into train and test (90:10)
            x_train_split, x_test_split, y_train_split, y_test_split, age_train_split, age_test_split, sex_train_split, sex_test_split = train_test_split(
                x_train, shuffled_y, age_train, sex_train, test_size=test_size, random_state=repeat
            )
            
            # Train model with shuffled labels
            model = DenseNet(num_init_features=self.num_init_features, 
                           growth_rate=self.growth_rate, 
                           block_layers=self.block_layers, 
                           compression_rate=self.compression_rate, 
                           drop_rate=self.drop_rate)
            
            # Two-phase training as in original code
            model.compile(loss='categorical_crossentropy',
                        optimizer=tf.keras.optimizers.Adam(lr=self.lr_1),
                        metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])
            
            history = model.fit([x_train_split, age_train_split, sex_train_split], 
                              y_train_split,
                              batch_size=256, epochs=self.no_epochs_1,
                              validation_data=([x_test_split, age_test_split, sex_test_split], 
                                              y_test_split),
                              verbose=0)
            
            model.compile(loss='categorical_crossentropy',
                        optimizer=tf.keras.optimizers.Adam(lr=self.lr_2),
                        metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])
            
            history = model.fit([x_train_split, age_train_split, sex_train_split], 
                              y_train_split,
                              batch_size=256, epochs=self.no_epochs_2,
                              validation_data=([x_test_split, age_test_split, sex_test_split], 
                                              y_test_split),
                              verbose=0)
            
            # Evaluate on test set
            y_pred = model.predict([x_test_split, age_test_split, sex_test_split])
            y_pred = np.argmax(y_pred, axis=1)
            y_true = np.argmax(y_test_split, axis=1)
            
            # Calculate metrics
            all_acc.append(metrics.accuracy_score(y_true, y_pred))
            all_prec.append(metrics.precision_score(y_true, y_pred))
            all_rec.append(metrics.recall_score(y_true, y_pred))
            all_mcc.append(metrics.matthews_corrcoef(y_true, y_pred))
            cm = metrics.confusion_matrix(y_true, y_pred)
            all_spec.append(cm[0,0]/(cm[0,0]+cm[0,1]))
            all_auc.append(history.history['val_auc'][-1])
        
        # Calculate overall statistics
        results = {
            'Accuracy': [np.mean(all_acc), np.std(all_acc), *self._bootstrap_ci(all_acc)],
            'Precision': [np.mean(all_prec), np.std(all_prec), *self._bootstrap_ci(all_prec)],
            'Sensitivity': [np.mean(all_rec), np.std(all_rec), *self._bootstrap_ci(all_rec)],
            'Specificity': [np.mean(all_spec), np.std(all_spec), *self._bootstrap_ci(all_spec)],
            'MCC': [np.mean(all_mcc), np.std(all_mcc), *self._bootstrap_ci(all_mcc)],
            'AUC': [np.mean(all_auc), np.std(all_auc), *self._bootstrap_ci(all_auc)]
        }
        
        # Create DataFrame
        results_df = pd.DataFrame(results)
        
        # Format the results
        results_df.index = [
            'Randomization test mean', 
            'Randomization test std',
            '95% CI (lower)',
            '95% CI (upper)'
        ]
        
        # Format numbers appropriately
        results_df.iloc[:2, :4] = results_df.iloc[:2, :4].applymap(lambda x: f"{float(x):.1%}")
        results_df.iloc[2:, :4] = results_df.iloc[2:, :4].applymap(lambda x: f"{float(x):.3f}")
        results_df.iloc[:, 4:] = results_df.iloc[:, 4:].applymap(lambda x: f"{float(x):.3f}")
        
        return results_df

# Usage 
RT = RandomizationTest(
    num_init_features=32,
    growth_rate=8,
    block_layers=[3,4,3],
    compression_rate=0.5,
    drop_rate=0.3,
    no_epochs_1=10,
    no_epochs_2=200,
    lr_1=1e-3,
    lr_2=1e-4
)

randomization_results = RT.run_test(x_train, y_train, age_train, sex_train, num_repeats=100)

# Save results
randomization_results.to_csv('randomization_test_results.csv')



Starting randomization test iteration 1/100


2025-05-07 10:46:51.098018: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2025-05-07 10:46:53.953089: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2025-05-07 10:46:54.295614: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7
