---
### Train and Predict 
In this section, we describe the models.GNNModel.train and models.GNNModel.predict functions usages via a simple example on a random generated dataset. All the four models, i.e. DGCNN, RGNN, SparseDGCNN, HetEmotionNet are in turn initialized and used. 

Note that the dataset is randomly generated, so the predicting accuracy is roughly equals to 0.5. 

#### Preparations

In [1]:
from ge.models import *

# based hyper-parameters defination 
num_nodes=30
num_hiddens=10
num_layers=2

# load electrode_position 
electrode_position=np.load('./src/utils&others/pos.npy')[:30]*100
print(electrode_position)
global_connections=[[0,1],[5,6],[7,8],[11,12],[13,14]]

# model initialization
model_DGCNN=DGCNN(num_nodes,num_hiddens,num_layers,electrode_position)
model_RGNN=RGNN(num_nodes,num_hiddens,num_layers,electrode_position,global_connections)
model_SparseDGCNN=SparseDGCNN(num_nodes,num_hiddens,num_layers,electrode_position)
model_Het=HetEmotionNet(num_nodes,num_hiddens)


  from .autonotebook import tqdm as notebook_tqdm


[[-2.708246 10.042033]
 [ 2.491506 10.115846]
 [-0.085834  6.512851]
 [-3.800103  6.358406]
 [ 3.697464  6.43073 ]
 [-6.248761  6.355425]
 [ 6.235231  6.503184]
 [-2.389717  4.129234]
 [ 2.215655  4.134819]
 [-5.93043   3.860679]
 [ 5.847392  3.913364]
 [-0.088036  1.769319]
 [-4.540056  1.579296]
 [ 4.410568  1.591498]
 [-7.135999  1.234258]
 [ 6.886154  1.261932]
 [-2.414903 -0.698447]
 [ 2.352306 -0.703254]
 [-5.86441  -0.960142]
 [ 5.852648 -0.967906]
 [-0.110991 -2.935592]
 [-3.734979 -3.000179]
 [ 3.632534 -3.002585]
 [-5.901261 -3.338525]
 [ 5.600342 -3.331862]
 [-2.709502 -4.725389]
 [ 2.428215 -4.739219]
 [-0.152567 -6.130292]
 [-2.394174 -6.109372]
 [ 2.093873 -6.087955]]


In [None]:
# dataset preparations
train_samples=30
test_samples=6
time_step=200

raw_data_train=np.random.uniform(0,100,(train_samples,num_nodes,time_step))
label_train=np.random.randint(0,2,(train_samples,))

raw_data_test=np.random.uniform(0,100,(test_samples,num_nodes,time_step))
label_test=np.random.randint(0,2,(test_samples,))



freqs = [[1,4],[4, 8], [8, 13], [13, 30], [30, 47]] # using 5 bands


import mne
def get_freq_data(data): # extract frequency DE features from time-domain raw data 
    de = np.zeros((data.shape[0], data.shape[1], len(freqs)))
    for i in range(len(freqs)):
        print('Current freq band: ', freqs[i])
        for sub in range(data.shape[0]):
            data_video = data[sub, :, :] #(num_nodes,time_step)
            low_freq = freqs[i][0]
            high_freq = freqs[i][1]
            data_video_filt = mne.filter.filter_data(
                data_video, time_step, l_freq=low_freq, h_freq=high_freq) 
            data_video_filt = data_video_filt.reshape(
                num_nodes,time_step)  
            de_one = 0.5*np.log(2*np.pi*np.exp(1) *
                                (np.var(data_video_filt, 1)))  # (num_nodes,)
            de[sub, :, i] = de_one
    return de

data_train=get_freq_data(raw_data_train) 
data_test=get_freq_data(raw_data_test) 


In [3]:
print(raw_data_train.shape) # (30,30,200)
print(label_train.shape) # (30,)
print(raw_data_test.shape) # (6,30,200)
print(label_test.shape) # (6,)
print(data_train.shape) # (30,30,5)
print(data_test.shape) # (6,30,5)

(30, 30, 200)
(30,)
(6, 30, 200)
(6,)
(30, 30, 5)
(6, 30, 5)


#### DGCNN

In [4]:
# DGCNN 
# define the training hyper-parameters and start training
train_acc_list=model_DGCNN.train(data_train,label_train,device=torch.device('cpu'),
            optimizer=torch.optim.Adam,num_classes=2,dropout=0.5,
            batch_size=32,lr=0.001,l1_reg=0,l2_reg=0,num_epoch=10)
print('DGCNN train list: ',train_acc_list)
# make predictions
predictions=model_DGCNN.predict(data_test)
cnt=np.sum(predictions==label_test)
print('test accuracy: ',cnt/test_samples)

in train_only num_fe None
gpu device  cpu
training epochs :  0


training epochs :  1
training epochs :  2
training epochs :  3
training epochs :  4
training epochs :  5
training epochs :  6
training epochs :  7
training epochs :  8
training epochs :  9
DGCNN train list:  [0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6]
test accuracy:  0.6666666666666666


#### RGNN

In [5]:
# RGNN 
# define the training hyper-parameters and start training
train_acc_list=model_RGNN.train(data_train,label_train,valid_data=data_test,device=torch.device('cpu'),
            optimizer=torch.optim.Adam,num_classes=2,dropout=0.5,NodeDAT=True,
            batch_size=32,lr=0.001,l1_reg=0,l2_reg=0,num_epoch=10)
print('RGNN train list:',train_acc_list)
# make predictions
predictions=model_RGNN.predict(data_test)
cnt=np.sum(predictions==label_test)
print('test accuracy: ',cnt/test_samples)

in train_only num_fe None
gpu device  cpu
training epochs :  0


training epochs :  1
training epochs :  2
training epochs :  3
training epochs :  4
training epochs :  5
training epochs :  6
training epochs :  7
training epochs :  8
training epochs :  9
RGNN train list: [0.4, 0.36666666666666664, 0.5666666666666667, 0.4, 0.6, 0.4, 0.43333333333333335, 0.4, 0.36666666666666664, 0.3333333333333333]
test accuracy:  0.3333333333333333


#### SparseDGCNN

In [6]:
# SparseDGCNN 
# define the training hyper-parameters and start training
train_acc_list=model_SparseDGCNN.train(data_train,label_train,device=torch.device('cpu'),
            optimizer=torch.optim.Adam,num_classes=2,dropout=0.5,
            batch_size=32,lr=0.001,l1_reg=0,l2_reg=0,num_epoch=10)
print('SparseDGCNN train list:',train_acc_list)
# make predictions
predictions=model_SparseDGCNN.predict(data_test)
cnt=np.sum(predictions==label_test)
print('test accuracy: ',cnt/test_samples)

in train_only num_fe None
gpu device  cpu
training epochs :  0
training epochs :  1
training epochs :  2
training epochs :  3
training epochs :  4
training epochs :  5
training epochs :  6
training epochs :  7
training epochs :  8
training epochs :  9
SparseDGCNN train list: [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.43333333333333335, 0.4666666666666667, 0.5666666666666667, 0.6333333333333333]
test accuracy:  0.6666666666666666


#### HetEmotionNet

In [7]:
# HetEmotionNet 
# define the training hyper-parameters and start training
train_acc_list=model_Het.train(data_train,raw_data_train,label_train,device=torch.device('cpu'),
            optimizer=torch.optim.Adam,num_classes=2,dropout=0.5,
            batch_size=32,lr=0.001,l1_reg=0,l2_reg=0,num_epoch=10)
print('HetEmotionNet train list:',train_acc_list)
# make predictions
predictions=model_Het.predict(data_test,raw_data_test)
cnt=np.sum(predictions==label_test)
print('test accuracy: ',cnt/test_samples)

in train_only num_fe 5
gpu device  cpu
num_freq 5
s1 (30, 30, 200)
s2 (30, 30, 5)
s3 torch.Size([30, 30, 30])
training epochs :  0
training epochs :  1
training epochs :  2
training epochs :  3
training epochs :  4
training epochs :  5
training epochs :  6
training epochs :  7
training epochs :  8
training epochs :  9
HetEmotionNet train list: [0.36666666666666664, 0.6, 0.6, 0.5333333333333333, 0.43333333333333335, 0.4, 0.4666666666666667, 0.43333333333333335, 0.4666666666666667, 0.5666666666666667]
test accuracy:  0.6666666666666666


#### Save and Load
Here, we show the usage of models.GNNModel.save and models.GNNModel.load functions. We use DGCNN model as an example.

In [8]:
model_DGCNN.save('./','my_DGCNN_model.dic.pkl')
new_model_DGCNN=DGCNN(1,1,1,[[1,1]])
new_model_DGCNN.load('./','my_DGCNN_model.dic.pkl')
predictions=new_model_DGCNN.predict(data_test)
cnt=np.sum(predictions==label_test)
print('test accuracy: ',cnt/test_samples)

test accuracy:  0.6666666666666666


---
### Make Evalutaions 
In this section, we show the usages of 'protocols.data_split', 'protocols.data_FACED' and 'protocols.evaluation' functions. Specifically, based on the model and dataset initialized above, we successively apply 'ncv', 'cv' and 'fcv' cross-validation protocols to figure out the best hyper-parameters and make evaluations. 

Note that the dataset is randomly generated, so the predicting accuracy is roughly equals to 0.5. And we mainly focus on the usage of data_split function and briefly show the usage of data_FACED at the end. 

In [9]:
from ge.protocols import *

# make further definations,  combine data_train and data_test
K=4
inner_K=3
data=np.concatenate((data_train,data_test),axis=0)
label=np.concatenate((label_train,label_test),axis=0)
raw_data=np.concatenate((raw_data_train,raw_data_test),axis=0)
subject_num=10
section_size=data.shape[0]/subject_num
# generate the subject_id_list for each sample
subject_id_list=np.array([int(i/section_size) for i in range(data.shape[0])])
print(subject_id_list.shape)

(36,)


#### ncv
We first present the evaluation methods under 'ncv' protocol

In [10]:
# DGCNN and 'ncv'
loader=data_split('cross_subject',data,label,subject_id_list)
mean_acc,out_acc_list=evaluation(model_DGCNN,loader,'ncv',grid={"lr":[0.01,0.001],"hiddens":[10,20],'epoch':list(range(0,10))},
                  categories=2,K=K,K_inner=inner_K,device=torch.device('cpu'),optimizer='Adam')
print(mean_acc,out_acc_list)

0.3055555555555556 [{'fold': 0, 'best_paras': {'lr': 0.01, 'hiddens': 10}, 'train_acc_mean': 0.6785714285714286, 'test_acc_mean': 0.375, 'test_num_samples': 8}, {'fold': 1, 'best_paras': {'lr': 0.001, 'hiddens': 10}, 'train_acc_mean': 0.6551724137931034, 'test_acc_mean': 0.42857142857142855, 'test_num_samples': 7}, {'fold': 2, 'best_paras': {'lr': 0.001, 'hiddens': 10}, 'train_acc_mean': 0.3793103448275862, 'test_acc_mean': 0.14285714285714285, 'test_num_samples': 7}, {'fold': 3, 'best_paras': {'lr': 0.01, 'hiddens': 20}, 'train_acc_mean': 0.5909090909090909, 'test_acc_mean': 0.2857142857142857, 'test_num_samples': 14}]


In [11]:
# SparseDGCNN and 'ncv'
mean_acc,out_acc_list=evaluation(model_SparseDGCNN,loader,'ncv',grid={"lr":[0.01,0.001],"hiddens":[10,20],'epoch':list(range(0,10))},
                  categories=2,K=K,K_inner=inner_K,device=torch.device('cpu'),optimizer='Adam')
print(mean_acc,out_acc_list)

0.4444444444444444 [{'fold': 0, 'best_paras': {'lr': 0.01, 'hiddens': 10}, 'train_acc_mean': 0.7142857142857143, 'test_acc_mean': 0.375, 'test_num_samples': 8}, {'fold': 1, 'best_paras': {'lr': 0.01, 'hiddens': 10}, 'train_acc_mean': 0.6551724137931034, 'test_acc_mean': 0.42857142857142855, 'test_num_samples': 7}, {'fold': 2, 'best_paras': {'lr': 0.001, 'hiddens': 20}, 'train_acc_mean': 0.5517241379310345, 'test_acc_mean': 0.8571428571428571, 'test_num_samples': 7}, {'fold': 3, 'best_paras': {'lr': 0.01, 'hiddens': 20}, 'train_acc_mean': 0.6818181818181818, 'test_acc_mean': 0.2857142857142857, 'test_num_samples': 14}]


In [12]:
# RGNN and 'ncv'
mean_acc,out_acc_list=evaluation(model_RGNN,loader,'ncv',grid={"lr":[0.01,0.001],"hiddens":[10,20],'epoch':list(range(0,10))},
                  categories=2,K=K,K_inner=inner_K,device=torch.device('cpu'),optimizer='Adam',NodeDAT=True)
print(mean_acc,out_acc_list)

0.4444444444444444 [{'fold': 0, 'best_paras': {'lr': 0.01, 'hiddens': 10}, 'train_acc_mean': 0.7142857142857143, 'test_acc_mean': 0.375, 'test_num_samples': 8}, {'fold': 1, 'best_paras': {'lr': 0.01, 'hiddens': 10}, 'train_acc_mean': 0.7241379310344828, 'test_acc_mean': 0.42857142857142855, 'test_num_samples': 7}, {'fold': 2, 'best_paras': {'lr': 0.01, 'hiddens': 20}, 'train_acc_mean': 0.5862068965517241, 'test_acc_mean': 0.8571428571428571, 'test_num_samples': 7}, {'fold': 3, 'best_paras': {'lr': 0.01, 'hiddens': 10}, 'train_acc_mean': 0.5909090909090909, 'test_acc_mean': 0.2857142857142857, 'test_num_samples': 14}]


In [13]:
# HetEmotionNet and 'ncv'
loader=data_split('cross_subject',data,label,subject_id_list,raw_data)
mean_acc,out_acc_list=evaluation(model_Het,loader,'ncv',grid={"lr":[0.01,0.001],"hiddens":[10,20],'epoch':list(range(0,10))},
                  categories=2,K=K,K_inner=inner_K,device=torch.device('cpu'),optimizer='Adam')
print(mean_acc,out_acc_list)

0.4722222222222222 [{'fold': 0, 'best_paras': {'lr': 0.01, 'hiddens': 10}, 'train_acc_mean': 0.6785714285714286, 'test_acc_mean': 0.375, 'test_num_samples': 8}, {'fold': 1, 'best_paras': {'lr': 0.01, 'hiddens': 10}, 'train_acc_mean': 0.6551724137931034, 'test_acc_mean': 0.42857142857142855, 'test_num_samples': 7}, {'fold': 2, 'best_paras': {'lr': 0.01, 'hiddens': 10}, 'train_acc_mean': 0.6551724137931034, 'test_acc_mean': 0.14285714285714285, 'test_num_samples': 7}, {'fold': 3, 'best_paras': {'lr': 0.01, 'hiddens': 10}, 'train_acc_mean': 0.6818181818181818, 'test_acc_mean': 0.7142857142857143, 'test_num_samples': 14}]


#### cv and fcv
For cv and fcv protocols, simply change the 'protocols' parameter. And 'K_inner' is no longer a necessity. We only present DGCNN model with cv and fcv protocol, other models can be applied in similar methods.

In [14]:
# DGCNN under cv with intra_subject
loader=data_split('intra_subject',data,label,subject_id_list)
best_dict,out_acc_list=evaluation(model_DGCNN,loader,'cv',grid={"lr":[0.01,0.001],"hiddens":[10,20],'epoch':list(range(0,10))},
                  categories=2,K=K,device=torch.device('cpu'),optimizer='Adam')
print(best_dict,out_acc_list)

# DGCNN under fcv with cross_subject
loader=data_split('cross_subject',data,label,subject_id_list)
best_dict,out_acc_list=evaluation(model_DGCNN,loader,'fcv',grid={"lr":[0.01,0.001],"hiddens":[10,20],'epoch':list(range(0,10))},
                  categories=2,K=K,device=torch.device('cpu'),optimizer='Adam')
print(best_dict,out_acc_list)

{'paras': {'lr': 0.01, 'hiddens': 20}, 'acc_mean': 0.6388888888888888, 'argmax_epoch': -1} [{'paras': {'lr': 0.01, 'hiddens': 10}, 'acc_mean': 0.6111111111111112, 'argmax_epoch': -1}, {'paras': {'lr': 0.001, 'hiddens': 10}, 'acc_mean': 0.5, 'argmax_epoch': -1}, {'paras': {'lr': 0.01, 'hiddens': 20}, 'acc_mean': 0.6388888888888888, 'argmax_epoch': -1}, {'paras': {'lr': 0.001, 'hiddens': 20}, 'acc_mean': 0.5, 'argmax_epoch': -1}]
{'paras': {'lr': 0.01, 'hiddens': 10}, 'acc_mean': 0.6944444444444444, 'argmax_epoch': 1} [{'paras': {'lr': 0.01, 'hiddens': 10}, 'acc_mean': 0.6944444444444444, 'argmax_epoch': 1}, {'paras': {'lr': 0.001, 'hiddens': 10}, 'acc_mean': 0.6944444444444444, 'argmax_epoch': 1}, {'paras': {'lr': 0.01, 'hiddens': 20}, 'acc_mean': 0.6111111111111112, 'argmax_epoch': 3}, {'paras': {'lr': 0.001, 'hiddens': 20}, 'acc_mean': 0.5277777777777778, 'argmax_epoch': 1}]


#### data_FACED
By the end of this notebook, we would like to show the usage of data_FACED. Please make sure the shape of the data is *(123, 720 or 840, 120)* 

In [15]:
# the data_path is just an example which in fact does not exist.
data_path='./src/my_FACED_dataset.mat'
loader=data_FACED('cross_subject',2,data_path)

ValueError: The path of FACED dataset does not exist.