In [2]:
import os
import numpy as np
import random
import pickle
import sys

from config import SAVE_DIR_DATA
from model_config_GSP import *
from simualte_data import simulate_data
from main_cross_validation import parrallel_run_A
from main_rep_kd import parrallel_run_B

# Create Demo Dataset

In [7]:
print("Creating demo dataset...")
subjects = 40
nodes = 35
views = 6

mu_0 = 3
sigma_0 = 0.2
mu_1 = 2.8
sigma_1 = 0.3

adjs_0 = simulate_data(subjects, nodes, views, sigma_0, mu_0)
adjs_1 = simulate_data(subjects, nodes, views, sigma_1, mu_1)

adjs = adjs_0 + adjs_1

labels = [0] * subjects + [1] * subjects
if not os.path.exists(SAVE_DIR_DATA + "gender_data"):
    os.makedirs(SAVE_DIR_DATA + "gender_data") 

with open(SAVE_DIR_DATA+'gender_data/gender_data_edges', 'wb') as f:
    pickle.dump(adjs, f)

with open(SAVE_DIR_DATA+'gender_data/gender_data_labels', 'wb') as f:
    pickle.dump(labels, f)
print("Finished creating demo dataset")


Creating demo dataset...
Finished creating demo dataset


# Pre-train teacher model

In [10]:
print("Pre-training teacher model...")
run = 0
parrallel_run_A(run, "gender_data")
print("Finished pre-training teacher model")

Pre-training teacher model...
/Users/lorenzostigliano/Documents/University/Imperial/TEST/thesis-imperial/model_data/gender_data/gcn/model_assessment/gcn
Run : 0
--------------------------------------------------------------------------
Main :  0 MainModel_3Fold_gender_data_gcn_run_0_fixed_init 3
CV :  0
Num training graphs:  54 ; Num test graphs:  26
Size of Training Set: 54
Size of Validation Set: 26
---------------------------------
Time taken for epoch 0: 0.15532779693603516
Train accuracy: 0.7037037037037037
Train loss: 0.49175480008125305
GCN Weights are saved:
Parameter containing:
tensor([[ 0.1636,  0.1415,  0.1556, -0.0569, -0.0577, -0.0182,  0.0499,  0.0688,
         -0.0134, -0.1092, -0.1273,  0.1636, -0.0459, -0.0212, -0.1032, -0.0836,
          0.0367,  0.0715, -0.0665, -0.0625,  0.1350, -0.0864, -0.1240, -0.0249,
          0.1610,  0.0364,  0.0253, -0.0215,  0.1057, -0.0836,  0.1144,  0.0405,
          0.0207,  0.0232, -0.1032]], requires_grad=True)
Validation accuracy: 0.

  _warn_prf(average, modifier, msg_start, len(result))


Size of Training Set: 72
Size of Validation Set: 8
---------------------------------
Time taken for epoch 0: 0.13161802291870117
Train accuracy: 0.7638888888888888
Train loss: 0.48662418127059937
GCN Weights are saved:
Parameter containing:
tensor([[ 0.1636,  0.1413,  0.1585, -0.0567, -0.0573, -0.0170,  0.0501,  0.0698,
         -0.0117, -0.1089, -0.1269,  0.1634, -0.0455, -0.0194, -0.1036, -0.0838,
          0.0368,  0.0725, -0.0658, -0.0617,  0.1350, -0.0864, -0.1237, -0.0244,
          0.1620,  0.0360,  0.0264, -0.0205,  0.1053, -0.0832,  0.1143,  0.0422,
          0.0237,  0.0232, -0.1037]], requires_grad=True)
Validation accuracy: 0.75
Validation Loss: 0.39453840302303433
Average Memory Usage: 229.62109375 MB, Std: 0.0
Average Time: 0.13161802291870117, Std: 0.0
CV :  7
Num training graphs:  72 ; Num test graphs:  8
Size of Training Set: 72
Size of Validation Set: 8
---------------------------------
Time taken for epoch 0: 0.11988139152526855
Train accuracy: 0.7638888888888888
Tra

# Train RepKD

In [11]:
print("Training RepKD...")
parrallel_run_B(run, "gender_data")
print("Finished RepKD...")

Training RepKD...
0
/Users/lorenzostigliano/Documents/University/Imperial/TEST/thesis-imperial/model_data/gender_data/gcn/model_assessment/gcn_student_lsp_ensamble_2
Run :  0
--------------------------------------------------------------------------
Main :  0 MainModel_3Fold_gender_data_gcn_student_lsp_ensamble_2_run_0_fixed_init 3
Num training graphs:  54 ; Num test graphs:  26
CV : 0
Size of Training Set: 54
Size of Validation Set: 26
---------------------------------
Time taken for epoch 0: 0.6128103733062744
Train total loss: 2.3086282478438482
Train teacher and student loss: 0.6873498272012781
Train within student loss for weights: 0.11318699891368549
GCN Weights are saved:
Parameter containing:
tensor([[ 0.0683,  0.1429,  0.1488,  0.1455,  0.0325, -0.1502,  0.0197, -0.1016,
         -0.1616,  0.1455,  0.1259, -0.1734,  0.0364, -0.0243, -0.0242, -0.0785,
          0.0606, -0.1006,  0.0580,  0.0862,  0.1181,  0.0620, -0.1634, -0.1048,
          0.0887,  0.0311, -0.1279, -0.0934,  0

## Getting the best model for RepKD

In [3]:
from analysis import *

analysis_type="model_assessment"
dataset_split="val"
dataset="gender_data"
models_args=[
    gcn_student_lsp_ensamble_2_args,
    gcn_student_lsp_ensamble_3_args,
    gcn_student_lsp_ensamble_4_args,
    gcn_student_lsp_ensamble_5_args
]
views = [0,2,4,5]

#GET ALL OF THE MODEL BEST STUDENTS FOR ALL RUNS AND DIFFERENT PICKING CRITERIA FOR OUR MODEL 
all_model_metrics_mean, all_model_metrics_var, all_model_best_student = get_all_best_student(analysis_type, dataset_split, dataset, models_args, views)

gcn_student_lsp_ensamble_2
[[1.0, 0.0, 0], [1.0, 0.0, 1], [1.0, 0.0, 0], [1.0, 0.0, 1], [1.0, 0.0, 0]]
[[1.0, 0.0, 0], [1.0, 0.0, 1], [1.0, 0.0, 0], [1.0, 0.0, 1], [1.0, 0.0, 0]]
[[1.0, 0.0, 0], [1.0, 0.0, 0], [1.0, 0.0, 0], [1.0, 0.0, 0], [1.0, 0.0, 0]]
[[1.0, 0.0, 0], [1.0, 0.0, 1], [1.0, 0.0, 0], [1.0, 0.0, 1], [1.0, 0.0, 0]]
gcn_student_lsp_ensamble_3
[[1.0, 0.0, 1], [1.0, 0.0, 1], [0.9916666666666667, 0.0, 2], [1.0, 0.0, 1], [0.9916666666666667, 0.0, 2]]
[[1.0, 0.0, 0], [1.0, 0.0, 1], [0.9916666666666667, 0.0, 2], [1.0, 0.0, 1], [0.9916666666666667, 0.0, 2]]
[[1.0, 0.0, 0], [1.0, 0.0, 0], [0.9916666666666667, 0.0, 2], [1.0, 0.0, 0], [0.9916666666666667, 0.0, 2]]
[[1.0, 0.0, 0], [1.0, 0.0, 1], [0.9916666666666667, 0.0, 2], [1.0, 0.0, 1], [0.9916666666666667, 0.0, 2]]
gcn_student_lsp_ensamble_4
[[1.0, 0.0, 0], [0.9666666666666667, 0.0, 1], [0.9833333333333334, 0.0, 2], [0.9666666666666667, 0.0, 1], [0.9833333333333334, 0.0, 2]]
[[1.0, 0.0, 0], [0.9666666666666667, 0.0, 1], [1.0, 0.0

In [4]:
df_rep, df_acuracy, df_var, index = final_student(all_model_metrics_mean, all_model_metrics_var, all_model_best_student, selection_method='weighted acc')

In [6]:
# Self-reproduciblity score of the best student in each ensemble
# x-axis: number of students in ensemble (2, 3, 4, 5)
# y-axis: view (data)
df_rep

Unnamed: 0,2,3,4,5
0,1.0,1.0,0.966667,1.0
1,1.0,1.0,0.966667,1.0
2,1.0,1.0,1.0,1.0
3,1.0,1.0,0.966667,1.0
4,1.0,1.0,0.975,1.0


In [7]:
# Accuracy of perforemance of the best student in each ensemble
df_acuracy

Unnamed: 0,2,3,4,5
0,0.575336,0.575336,0.575336,0.541484
1,0.567002,0.567002,0.562729,0.574725
2,0.533547,0.521047,0.537821,0.58286
3,0.550122,0.550122,0.545849,0.620467
4,0.556502,0.553377,0.555433,0.579884


In [8]:
# Variance of perforemance of the best student in each ensemble
df_var

Unnamed: 0,2,3,4,5
0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0


In [9]:
# Index of the best student in each ensemble
index

Unnamed: 0,2,3,4,5
0,1.0,1.0,1.0,4.0
1,1.0,1.0,1.0,4.0
2,0.0,0.0,0.0,4.0
3,1.0,1.0,1.0,4.0
