# load data

In [1]:
import torch
import numpy as np
from data_loader.data_loaders import PCDataLoader
from model.model import MyDNN

# fix random seeds for reproducibility
SEED = 10
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(SEED)

data_loader = PCDataLoader(model_dir='saved/models/DNN-NegativeSampling-NewDataT0.1/0801_222300/',
                           data_dir='data/',
                           batch_size=32,
                           group='PC',
                           seed=SEED,
                           shuffle=True,
                           validation_split=0.2,
                           test_split=0.0,
                           num_workers=2)

feature_list = data_loader.get_feature_list()

'''create dataframe'''
train_df = data_loader.df_train
valid_df = data_loader.df_valid

feature_num = data_loader.get_feature_num()

# build model architecture, then print to console
model = MyDNN(feature_num=feature_num,
              dropout=0.5)

# load trained model
resume_dir = 'saved/models/DNN-NegativeSampling-NewDataT0.1/0801_222300/model_best.pth'
checkpoint = torch.load(resume_dir,map_location ='cpu') #这里加了一句map_location ='cpu'
state_dict = checkpoint['state_dict']
model.load_state_dict(state_dict)

# prepare model for testing
device = torch.device('cpu')
model = model.to(device)
model.eval()

def df2tensor(df):
    tensor_df = torch.from_numpy(df.to_numpy())
    return tensor_df

Negative sampling...
107 positive and 107 negative records.
26 positive and 26 negative records.
23 positive and 23 negative records.


# select top patients

In [9]:
valid_df.head()

Unnamed: 0,NAME,Group,Outcome,A_original_shape_VoxelVolume,A_original_shape_MeshVolume,A_original_shape_SurfaceArea,A_original_shape_SurfaceVolumeRatio,A_original_shape_Maximum3DDiameter,A_original_shape_Maximum2DDiameterSlice,A_original_shape_Maximum2DDiameterColumn,...,P_wavelet-LLL_glrlm_LongRunEmphasis,P_wavelet-LLL_glrlm_LongRunHighGrayLevelEmphasis,P_wavelet-LLL_glrlm_LongRunLowGrayLevelEmphasis,P_wavelet-LLL_glrlm_RunEntropy,P_wavelet-LLL_glrlm_RunLengthNonUniformity,P_wavelet-LLL_glrlm_RunLengthNonUniformityNormalized,P_wavelet-LLL_glrlm_RunPercentage,P_wavelet-LLL_glrlm_RunVariance,P_wavelet-LLL_glrlm_ShortRunEmphasis,P_wavelet-LLL_glrlm_ShortRunLowGrayLevelEmphasis
102,PENG WANG XIN,PC,0,-0.502184,-0.502126,-0.611651,0.647962,-0.732197,-0.72284,-0.680767,...,-0.524796,-0.524836,-0.524786,-0.426402,-0.307732,0.296657,0.550718,-0.498348,0.437964,0.441617
12,DAI QUAN JU,PC,1,0.26476,0.263896,0.800146,-0.589642,1.496117,1.525422,0.718595,...,0.537462,0.537347,0.53749,0.807331,2.334529,-0.491107,-0.745436,0.75938,0.253348,0.257994
4,CHAI XIAO QING,PC,0,-0.491792,-0.491558,-0.586913,0.590669,-0.65552,-0.438469,-0.590014,...,-0.466472,-0.466516,-0.466462,-0.406194,-0.453249,0.272923,0.347249,-0.508814,0.483275,0.486685
14,FAN JIAN XIN,PC,0,-0.550856,-0.550588,-0.726887,1.025006,-0.836991,-0.867096,-0.906961,...,-0.598176,-0.598211,-0.598168,-0.651449,-0.783742,0.465321,0.518169,-0.597798,0.390699,0.394606
125,TIAN DE QING,PC,1,-0.634819,-0.633989,-0.978095,2.462277,-1.275515,-1.233172,-1.304808,...,-0.755795,-0.755818,-0.755789,-1.511636,-0.972166,1.564481,1.639152,-0.792541,0.863647,0.86501


In [2]:
import pandas as pd

result_df = pd.read_csv('saved/models/DNN-NegativeSampling-NewDataT0.1/0801_222300/valid_result.csv')
result_df.head()

Unnamed: 0,patient_index,prediction,true,patient
0,47,0.670299,0.0,PENG WANG XIN
1,41,0.38755,0.0,XV CHANG BI
2,22,0.438404,1.0,WU GUANG MING
3,44,0.503144,0.0,HE XIANG CUI
4,17,0.386973,0.0,GUAN ZHI QING


In [6]:
pos_df = result_df[result_df['true']==1]
pos_df = pos_df.sort_values(by=['prediction'], ascending=False)
pos_df.head()

Unnamed: 0,patient_index,prediction,true,patient
15,20,0.869387,1.0,ZHOU JIN JU
13,23,0.860005,1.0,ZHANG SAN MAO
26,9,0.858507,1.0,HUANG GAI DI
23,25,0.856949,1.0,LIU LA SHENG
28,6,0.80688,1.0,PAN ZI DA


In [7]:
neg_df = result_df[result_df['true']==0]
neg_df = neg_df.sort_values(by=['prediction'])
neg_df.head()

Unnamed: 0,patient_index,prediction,true,patient
14,27,0.163523,0.0,HU XI MING
6,51,0.28043,0.0,ZHOU QING YOU
12,5,0.281073,0.0,LIAO YI DE
4,17,0.386973,0.0,GUAN ZHI QING
1,41,0.38755,0.0,XV CHANG BI


In [8]:
valid_captum_df = pd.concat([pos_df.head(5), neg_df.head(5)])
valid_captum_df

Unnamed: 0,patient_index,prediction,true,patient
15,20,0.869387,1.0,ZHOU JIN JU
13,23,0.860005,1.0,ZHANG SAN MAO
26,9,0.858507,1.0,HUANG GAI DI
23,25,0.856949,1.0,LIU LA SHENG
28,6,0.80688,1.0,PAN ZI DA
14,27,0.163523,0.0,HU XI MING
6,51,0.28043,0.0,ZHOU QING YOU
12,5,0.281073,0.0,LIAO YI DE
4,17,0.386973,0.0,GUAN ZHI QING
1,41,0.38755,0.0,XV CHANG BI


In [11]:
feature_df = valid_df[valid_df['NAME'].isin(list(valid_captum_df['patient']))]
feature_df.drop_duplicates(subset=['NAME'], inplace=True)
feature_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  feature_df.drop_duplicates(subset=['NAME'], inplace=True)


Unnamed: 0,NAME,Group,Outcome,A_original_shape_VoxelVolume,A_original_shape_MeshVolume,A_original_shape_SurfaceArea,A_original_shape_SurfaceVolumeRatio,A_original_shape_Maximum3DDiameter,A_original_shape_Maximum2DDiameterSlice,A_original_shape_Maximum2DDiameterColumn,...,P_wavelet-LLL_glrlm_LongRunEmphasis,P_wavelet-LLL_glrlm_LongRunHighGrayLevelEmphasis,P_wavelet-LLL_glrlm_LongRunLowGrayLevelEmphasis,P_wavelet-LLL_glrlm_RunEntropy,P_wavelet-LLL_glrlm_RunLengthNonUniformity,P_wavelet-LLL_glrlm_RunLengthNonUniformityNormalized,P_wavelet-LLL_glrlm_RunPercentage,P_wavelet-LLL_glrlm_RunVariance,P_wavelet-LLL_glrlm_ShortRunEmphasis,P_wavelet-LLL_glrlm_ShortRunLowGrayLevelEmphasis
72,LIAO YI DE,PC,0,-0.351118,-0.351256,0.211344,1.80926,0.236758,-0.321728,0.168174,...,-0.823044,-0.823063,-0.82304,-0.999603,0.399108,1.203848,2.08614,-0.696103,1.949288,1.94481
98,PAN ZI DA,PC,1,-0.17035,-0.170566,-0.030491,-0.420244,-0.09823,-0.020091,0.075304,...,-0.113896,-0.113965,-0.113879,-0.111861,0.290858,0.222412,-0.227451,-0.206942,0.648873,0.651392
39,HUANG GAI DI,PC,1,0.348478,0.347708,0.813244,-0.764068,1.017669,0.86675,1.435929,...,0.359791,0.359688,0.359816,0.664039,1.664224,-0.415038,-0.646333,0.388575,0.291812,0.296252
210,ZHOU QING YOU,PC,0,-0.550807,-0.550518,-0.705309,1.221941,-0.667503,-0.601907,-0.679314,...,-0.602595,-0.602629,-0.602587,-0.984234,-0.028853,1.456128,1.232567,-0.646293,1.685187,1.682129
21,GUAN ZHI QING,PC,0,-0.495371,-0.495099,-0.659872,0.202229,-0.769054,-0.824317,-0.72788,...,-0.589246,-0.589281,-0.589237,-0.637484,-0.824912,0.286621,0.359698,-0.602873,-0.354111,-0.346196
208,ZHOU JIN JU,PC,1,-0.365186,-0.365149,-0.405409,-0.197797,-0.085281,-0.239028,0.069131,...,1.58589,1.5857,1.585936,1.656107,0.586371,-1.464273,-1.506486,1.675179,-1.133086,-1.120979
188,ZHANG SAN MAO,PC,1,-0.073642,-0.073713,-0.077649,-0.906574,0.317927,-0.392229,0.466923,...,0.020108,0.020029,0.020127,0.750113,-0.316902,-1.09479,-0.757075,0.05645,-1.530837,-1.516591
87,LIU LA SHENG,PC,1,0.52209,0.521425,0.861888,-1.027628,0.476387,0.607808,0.851585,...,1.029156,1.029005,1.029192,1.305025,0.131601,-1.395757,-1.341139,0.99891,-1.589062,-1.574502
37,HU XI MING,PC,0,-0.647612,-0.646648,-1.034104,2.742062,-1.50636,-1.435315,-1.584747,...,-0.827456,-0.827475,-0.827452,-1.7383,-1.13385,1.968106,1.964166,-0.834454,1.146278,1.14612
161,XV CHANG BI,PC,0,-0.623987,-0.623178,-0.967422,1.45934,-1.234624,-1.252729,-1.463201,...,-0.794333,-0.794353,-0.794328,-1.337316,-0.985566,1.533223,1.521211,-0.750537,1.510615,1.508496


# captum

In [17]:
import torch
from captum.attr import IntegratedGradients
ig = IntegratedGradients(model)

test_feature = df2tensor(feature_df[feature_list])
ig_attr_test = ig.attribute(test_feature.float(), n_steps=10)

  return Variable._execution_engine.run_backward(


In [19]:
ig_attr_test.shape

torch.Size([10, 615])

In [22]:
ig_attr_test_df = pd.DataFrame(ig_attr_test.cpu().detach().numpy(), columns=feature_list)
ig_attr_test_df.head()

Unnamed: 0,A_original_shape_VoxelVolume,A_original_shape_MeshVolume,A_original_shape_SurfaceArea,A_original_shape_SurfaceVolumeRatio,A_original_shape_Maximum3DDiameter,A_original_shape_Maximum2DDiameterSlice,A_original_shape_Maximum2DDiameterColumn,A_original_shape_Maximum2DDiameterRow,A_original_shape_MajorAxisLength,A_original_shape_MinorAxisLength,...,P_wavelet-LLL_glrlm_LongRunEmphasis,P_wavelet-LLL_glrlm_LongRunHighGrayLevelEmphasis,P_wavelet-LLL_glrlm_LongRunLowGrayLevelEmphasis,P_wavelet-LLL_glrlm_RunEntropy,P_wavelet-LLL_glrlm_RunLengthNonUniformity,P_wavelet-LLL_glrlm_RunLengthNonUniformityNormalized,P_wavelet-LLL_glrlm_RunPercentage,P_wavelet-LLL_glrlm_RunVariance,P_wavelet-LLL_glrlm_ShortRunEmphasis,P_wavelet-LLL_glrlm_ShortRunLowGrayLevelEmphasis
0,-0.000194,-0.000288,0.000101,-0.000383,0.000764,-0.001766,0.000507,0.000238,0.001271,3.3e-05,...,-0.000592,-0.000279,-0.000296,1.1e-05,-0.000762,0.000212,0.000235,-0.000241,-0.01118,-0.007874
1,-5.2e-05,-0.000112,0.000161,-0.000375,0.000571,6.3e-05,-0.000508,-0.000225,0.001277,-0.000132,...,4.3e-05,-1.9e-05,3e-06,0.000192,0.005709,0.00039,-4e-06,3.5e-05,-0.005947,-0.004452
2,0.000172,0.000305,-0.004244,-0.000508,-0.00551,-0.001982,-0.009382,0.002741,-0.005888,-0.00267,...,-9.8e-05,0.000155,9.3e-05,-0.001432,0.031348,-0.00082,-0.000246,-7.1e-05,-0.002635,-0.002118
3,-0.000233,-0.00027,0.00031,-0.000143,-0.002014,-0.004114,-0.001664,-0.000318,-0.001489,0.000808,...,-0.000266,2.1e-05,-3.8e-05,0.000197,0.000143,0.000727,0.000107,7e-05,-0.005168,-0.009246
4,-0.000253,-0.000291,-0.000185,0.000133,-0.002507,-0.006637,-0.002562,-0.002107,-0.001759,0.000423,...,-0.000572,-0.000386,-0.000335,-2.4e-05,5e-06,6.8e-05,-1e-05,-0.000251,0.001364,0.00282


In [23]:
print(len(ig_attr_test_df))

10


In [24]:
valid_captum_df.reset_index(inplace=True, drop=True)
ig_attr_test_df = pd.concat([valid_captum_df, ig_attr_test_df], axis=1)
ig_attr_test_df.head()

Unnamed: 0,patient_index,prediction,true,patient,A_original_shape_VoxelVolume,A_original_shape_MeshVolume,A_original_shape_SurfaceArea,A_original_shape_SurfaceVolumeRatio,A_original_shape_Maximum3DDiameter,A_original_shape_Maximum2DDiameterSlice,...,P_wavelet-LLL_glrlm_LongRunEmphasis,P_wavelet-LLL_glrlm_LongRunHighGrayLevelEmphasis,P_wavelet-LLL_glrlm_LongRunLowGrayLevelEmphasis,P_wavelet-LLL_glrlm_RunEntropy,P_wavelet-LLL_glrlm_RunLengthNonUniformity,P_wavelet-LLL_glrlm_RunLengthNonUniformityNormalized,P_wavelet-LLL_glrlm_RunPercentage,P_wavelet-LLL_glrlm_RunVariance,P_wavelet-LLL_glrlm_ShortRunEmphasis,P_wavelet-LLL_glrlm_ShortRunLowGrayLevelEmphasis
0,20,0.869387,1.0,ZHOU JIN JU,-0.000194,-0.000288,0.000101,-0.000383,0.000764,-0.001766,...,-0.000592,-0.000279,-0.000296,1.1e-05,-0.000762,0.000212,0.000235,-0.000241,-0.01118,-0.007874
1,23,0.860005,1.0,ZHANG SAN MAO,-5.2e-05,-0.000112,0.000161,-0.000375,0.000571,6.3e-05,...,4.3e-05,-1.9e-05,3e-06,0.000192,0.005709,0.00039,-4e-06,3.5e-05,-0.005947,-0.004452
2,9,0.858507,1.0,HUANG GAI DI,0.000172,0.000305,-0.004244,-0.000508,-0.00551,-0.001982,...,-9.8e-05,0.000155,9.3e-05,-0.001432,0.031348,-0.00082,-0.000246,-7.1e-05,-0.002635,-0.002118
3,25,0.856949,1.0,LIU LA SHENG,-0.000233,-0.00027,0.00031,-0.000143,-0.002014,-0.004114,...,-0.000266,2.1e-05,-3.8e-05,0.000197,0.000143,0.000727,0.000107,7e-05,-0.005168,-0.009246
4,6,0.80688,1.0,PAN ZI DA,-0.000253,-0.000291,-0.000185,0.000133,-0.002507,-0.006637,...,-0.000572,-0.000386,-0.000335,-2.4e-05,5e-06,6.8e-05,-1e-05,-0.000251,0.001364,0.00282


In [25]:
ig_attr_test_df.to_csv('test_IntegratedGradients.csv', index=False)