In [1]:
import torch
import schnetpack as spk
from schnetpack.datasets import QM9
from schnetpack import AtomsData
import numpy as np

def hook(self, inp_tensor, out_tensor):
    # Self is included and refers to the model class
    # Global allows us to utilize embedding_output outside the current function scope
    global layer
    #Update the embedding_output variable to be equal to our output tensor
    layer=out_tensor 


n_atom_basis = 128
n_filters = 128
n_gaussians = 50
n_interactions = 6 
cutoff = 50. 


qm9_filepath = 'data/datasets/QM9/qm9.db'
qm9_data = QM9(qm9_filepath,download=False,remove_uncharacterized=True)

data_filepath = 'data/datasets/QM9/qm9.db'
available_properties = 'N/A'
data = QM9(qm9_filepath,download=False,remove_uncharacterized=True)


# Load atom ref data 
atomrefs = qm9_data.get_atomref(QM9.U0)

# Define SchNet representation model

schnet = spk.representation.SchNet(
n_atom_basis=n_atom_basis, n_filters=n_filters, n_gaussians=n_gaussians, n_interactions=n_interactions,
cutoff=cutoff , cutoff_network=spk.nn.cutoff.CosineCutoff
)

# Define SchNet output model and property to be predicted
output_U0 = spk.atomistic.Atomwise(n_in=n_filters,atomref=atomrefs[QM9.U0])

# Define atomistic model
model = spk.AtomisticModel(representation=schnet,output_modules=output_U0)


# Load saved checkpoint file
checkpoint_path = 'data/trainedmodels/model1/trained-1000.pth'
load_checkpoint = torch.load(checkpoint_path,map_location=torch.device('cpu'))


#qm9_i6_30f_20g-1000-500-4_300.pth
# load model's state dictionary from saved checkpoint
model.load_state_dict(load_checkpoint)

print(model.state_dict().keys())

#set up device for forward pass
device='cpu'

# load atoms converter 
converter = spk.data.AtomsConverter(device=device)



  from .autonotebook import tqdm as notebook_tqdm


odict_keys(['representation.embedding.weight', 'representation.distance_expansion.width', 'representation.distance_expansion.offsets', 'representation.interactions.0.filter_network.0.weight', 'representation.interactions.0.filter_network.0.bias', 'representation.interactions.0.filter_network.1.weight', 'representation.interactions.0.filter_network.1.bias', 'representation.interactions.0.cutoff_network.cutoff', 'representation.interactions.0.cfconv.in2f.weight', 'representation.interactions.0.cfconv.f2out.weight', 'representation.interactions.0.cfconv.f2out.bias', 'representation.interactions.0.cfconv.filter_network.0.weight', 'representation.interactions.0.cfconv.filter_network.0.bias', 'representation.interactions.0.cfconv.filter_network.1.weight', 'representation.interactions.0.cfconv.filter_network.1.bias', 'representation.interactions.0.cfconv.cutoff_network.cutoff', 'representation.interactions.0.dense.weight', 'representation.interactions.0.dense.bias', 'representation.interactio

For rep1-rep3

In [None]:
#run an input and extract an internal vector
total_embH_list = np.zeros(128)
total_embC_list = np.zeros(128)
total_embN_list = np.zeros(128)
total_embO_list = np.zeros(128)

int0H_list = np.zeros(128)
int1H_list = np.zeros(128)
int2H_list = np.zeros(128)
int3H_list = np.zeros(128)
int4H_list = np.zeros(128)
int5H_list = np.zeros(128)
int0C_list = np.zeros(128)
int1C_list = np.zeros(128)
int2C_list = np.zeros(128)
int3C_list = np.zeros(128)
int4C_list = np.zeros(128)
int5C_list = np.zeros(128)
int0N_list = np.zeros(128)
int1N_list = np.zeros(128)
int2N_list = np.zeros(128)
int3N_list = np.zeros(128)
int4N_list = np.zeros(128)
int5N_list = np.zeros(128)
int0O_list = np.zeros(128)
int1O_list = np.zeros(128)
int2O_list = np.zeros(128)
int3O_list = np.zeros(128)
int4O_list = np.zeros(128)
int5O_list = np.zeros(128)

for idx in range(10000):
    at, props = data.get_properties(idx)


    inputs = converter(at)

    layer = None 
    model.representation.embedding.register_forward_hook(hook)
    model(inputs)
    emb = layer.clone()
    emb = layer.detach().numpy()

    layer = None
    model.representation.interactions[0].register_forward_hook(hook)   
    model(inputs)
    int0 = layer.clone()
    int0 = int0.detach().numpy()   

    layer = None
    model.representation.interactions[1].register_forward_hook(hook)  
    model(inputs)
    int1 = layer.clone()
    int1 = int1.detach().numpy()   

    layer = None
    model.representation.interactions[1].register_forward_hook(hook)  
    model(inputs)
    int2 = layer.clone()
    int2 = int2.detach().numpy()   

    layer = None
    model.representation.interactions[1].register_forward_hook(hook)  
    model(inputs)
    int3 = layer.clone()
    int3 = int3.detach().numpy()   

    layer = None
    model.representation.interactions[1].register_forward_hook(hook)  
    model(inputs)
    int4 = layer.clone()
    int4 = int4.detach().numpy()   

    layer = None
    model.representation.interactions[1].register_forward_hook(hook)  
    model(inputs)
    int5 = layer.clone()
    int5 = int5.detach().numpy()   


    total_emb = emb + int0 + int1 +int2

    number_atoms = len(props['_atomic_numbers'])
    for each_atom in range(number_atoms):
        if props['_atomic_numbers'][each_atom] == 1:
            int0_H = np.vstack((int0H_list,int0[0][each_atom]))
            int1_H = np.vstack((int1H_list,int1[0][each_atom]))
            int2_H = np.vstack((int2H_list,int2[0][each_atom]))
            int3_H = np.vstack((int3H_list,int3[0][each_atom]))
            int4_H = np.vstack((int4H_list,int4[0][each_atom]))
            int5_H = np.vstack((int5H_list,int5[0][each_atom]))

            total_embH_list = np.vstack((total_embH_list,total_emb[0][each_atom]))
        if props['_atomic_numbers'][each_atom] == 6:
            int0_C = np.vstack((int0C_list,int0[0][each_atom]))
            int1_C = np.vstack((int1C_list,int1[0][each_atom]))
            int2_C = np.vstack((int2C_list,int2[0][each_atom]))
            int3_C = np.vstack((int3C_list,int3[0][each_atom]))
            int4_C = np.vstack((int4C_list,int4[0][each_atom]))
            int5_C = np.vstack((int5C_list,int5[0][each_atom]))

            total_embC_list = np.vstack((total_embC_list,total_emb[0][each_atom]))
        if props['_atomic_numbers'][each_atom] == 7:
            int0_N = np.vstack((int0N_list,int0[0][each_atom]))
            int1_N = np.vstack((int1N_list,int1[0][each_atom]))
            int2_N = np.vstack((int2N_list,int2[0][each_atom]))
            int3_N = np.vstack((int3N_list,int3[0][each_atom]))
            int4_N = np.vstack((int4N_list,int4[0][each_atom]))
            int5_N = np.vstack((int5N_list,int5[0][each_atom]))

            total_embN_list = np.vstack((total_embN_list,total_emb[0][each_atom]))
        if props['_atomic_numbers'][each_atom] == 8:
            int0_O = np.vstack((int0O_list,int0[0][each_atom]))
            int1_O = np.vstack((int1O_list,int1[0][each_atom]))
            int2_O = np.vstack((int2O_list,int2[0][each_atom]))
            int3_O = np.vstack((int3O_list,int3[0][each_atom]))
            int4_O = np.vstack((int4O_list,int4[0][each_atom]))
            int5_O = np.vstack((int5O_list,int5[0][each_atom]))

            total_embO_list = np.vstack((total_embO_list,total_emb[0][each_atom]))


In [25]:
total_embH_list = np.delete(total_embH_list,0,axis=0) 
total_embC_list = np.delete(total_embC_list,0,axis=0) 
total_embN_list = np.delete(total_embN_list,0,axis=0) 
total_embO_list = np.delete(total_embO_list,0,axis=0) 
int0_H = np.delete(int0H_list,0,axis=0) 
int1_H = np.delete(int0H_list,0,axis=0) 
int2_H = np.delete(int0H_list,0,axis=0) 
int3_H = np.delete(int0H_list,0,axis=0) 
int4_H = np.delete(int0H_list,0,axis=0) 
int5_H = np.delete(int0H_list,0,axis=0) 
int0_C = np.delete(int0C_list,0,axis=0) 
int1_C = np.delete(int0C_list,0,axis=0) 
int2_C = np.delete(int0C_list,0,axis=0) 
int3_C = np.delete(int0C_list,0,axis=0) 
int4_C = np.delete(int0C_list,0,axis=0) 
int5_C = np.delete(int0C_list,0,axis=0) 
int0_N = np.delete(int0N_list,0,axis=0) 
int1_N = np.delete(int0N_list,0,axis=0) 
int2_N = np.delete(int0N_list,0,axis=0) 
int3_N = np.delete(int0N_list,0,axis=0) 
int4_N = np.delete(int0N_list,0,axis=0) 
int5_N = np.delete(int0N_list,0,axis=0)
int0_O = np.delete(int0O_list,0,axis=0) 
int1_O = np.delete(int0O_list,0,axis=0) 
int2_O = np.delete(int0O_list,0,axis=0) 
int3_O = np.delete(int0O_list,0,axis=0) 
int4_O = np.delete(int0O_list,0,axis=0) 
int5_O = np.delete(int0O_list,0,axis=0)  


from numpy import savetxt

savetxt('embsH.csv',total_embH_list,delimiter=',')
savetxt('embsC.csv',total_embC_list,delimiter=',')
savetxt('embsN.csv',total_embN_list,delimiter=',')
savetxt('embsO.csv',total_embO_list,delimiter=',')
savetxt('int0H.csv',int0H_list,delimiter=',')
savetxt('int1H.csv',int1H_list,delimiter=',')
savetxt('int2H.csv',int2H_list,delimiter=',')
savetxt('int3H.csv',int3H_list,delimiter=',')
savetxt('int4H.csv',int4H_list,delimiter=',')
savetxt('int5H.csv',int5H_list,delimiter=',')
savetxt('int0C.csv',int0C_list,delimiter=',')
savetxt('int1C.csv',int1C_list,delimiter=',')
savetxt('int2C.csv',int2C_list,delimiter=',')
savetxt('int3C.csv',int3C_list,delimiter=',')
savetxt('int4C.csv',int4C_list,delimiter=',')
savetxt('int5C.csv',int5C_list,delimiter=',')
savetxt('int0N.csv',int0N_list,delimiter=',')
savetxt('int1N.csv',int1N_list,delimiter=',')
savetxt('int2N.csv',int2N_list,delimiter=',')
savetxt('int3N.csv',int3N_list,delimiter=',')
savetxt('int4N.csv',int4N_list,delimiter=',')
savetxt('int5N.csv',int5N_list,delimiter=',')
savetxt('int0O.csv',int0O_list,delimiter=',')
savetxt('int1O.csv',int1O_list,delimiter=',')
savetxt('int2O.csv',int2O_list,delimiter=',')
savetxt('int3O.csv',int3O_list,delimiter=',')
savetxt('int4O.csv',int4O_list,delimiter=',')
savetxt('int5O.csv',int5O_list,delimiter=',')

[[-2.12359047e+00  5.49322844e-01  1.37732434e+00 -2.94602841e-01
  -1.76522803e+00 -7.11925626e-02 -4.49874461e-01  8.53084445e-01
  -2.89131194e-01  1.83485579e+00 -7.41182923e-01  3.43575448e-01
   3.78622353e-01 -1.03042805e+00  7.60020196e-01 -7.54284620e-01
  -3.33554506e-01  3.90934134e+00  2.03850460e+00 -3.68002009e+00
   7.76915312e-01 -3.98384547e+00  2.55580902e+00 -3.30014855e-01
  -2.76858640e+00 -2.70543885e+00 -2.88286352e+00 -4.28451681e+00
  -7.35593855e-01 -1.24175854e-01]
 [-2.12359047e+00  5.49323201e-01  1.37732387e+00 -2.94602692e-01
  -1.76522791e+00 -7.11928010e-02 -4.49874401e-01  8.53084207e-01
  -2.89131105e-01  1.83485615e+00 -7.41182804e-01  3.43576044e-01
   3.78622234e-01 -1.03042781e+00  7.60020614e-01 -7.54284620e-01
  -3.33554745e-01  3.90934110e+00  2.03850460e+00 -3.68001914e+00
   7.76915312e-01 -3.98384523e+00  2.55580854e+00 -3.30014795e-01
  -2.76858640e+00 -2.70543838e+00 -2.88286352e+00 -4.28451729e+00
  -7.35594392e-01 -1.24176033e-01]
 [-1.8

Dimension reduction

In [3]:
from sklearn.decomposition import  PCA
import scipy.linalg as la


n_components = 30
data = total_embH_list
save_filepath = 'embsHpca.csv'

#perform PCA decomposition of the data
pca = PCA(n_components)
pca.fit()
x_pca = pca.transform(data)
cov = pca.get_covariance()
eig, ev = la.eig(cov)
total = sum(eig.real)
evt=np.transpose(ev)
unit=np.matmul(ev,evt)


save_filepathpca = save_filepath.replace('.csv','transform.csv')
savetxt(save_filepathpca, x_pca, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','eig.csv')
eig=eig.real
savetxt(save_filepatheig, eig, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','ev.csv')
ev=ev.real
savetxt(save_filepatheig, ev, delimiter=',')
save_filepathcov = save_filepath.replace('.csv','cov.csv')
cov=cov.real
savetxt(save_filepathcov, cov, delimiter=',')

ModuleNotFoundError: No module named 'tools.utils_dimred'

In [None]:
from sklearn.decomposition import  PCA
import scipy.linalg as la


n_components = 30
data = total_embC_list
save_filepath = 'embsCpca.csv'

#perform PCA decomposition of the data
pca = PCA(n_components)
pca.fit()
x_pca = pca.transform(data)
cov = pca.get_covariance()
eig, ev = la.eig(cov)
total = sum(eig.real)
evt=np.transpose(ev)
unit=np.matmul(ev,evt)


save_filepathpca = save_filepath.replace('.csv','transform.csv')
savetxt(save_filepathpca, x_pca, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','eig.csv')
eig=eig.real
savetxt(save_filepatheig, eig, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','ev.csv')
ev=ev.real
savetxt(save_filepatheig, ev, delimiter=',')
save_filepathcov = save_filepath.replace('.csv','cov.csv')
cov=cov.real
savetxt(save_filepathcov, cov, delimiter=',')

In [None]:
from sklearn.decomposition import  PCA
import scipy.linalg as la


n_components = 30
data = total_embN_list
save_filepath = 'embsNpca.csv'

#perform PCA decomposition of the data
pca = PCA(n_components)
pca.fit()
x_pca = pca.transform(data)
cov = pca.get_covariance()
eig, ev = la.eig(cov)
total = sum(eig.real)
evt=np.transpose(ev)
unit=np.matmul(ev,evt)


save_filepathpca = save_filepath.replace('.csv','transform.csv')
savetxt(save_filepathpca, x_pca, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','eig.csv')
eig=eig.real
savetxt(save_filepatheig, eig, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','ev.csv')
ev=ev.real
savetxt(save_filepatheig, ev, delimiter=',')
save_filepathcov = save_filepath.replace('.csv','cov.csv')
cov=cov.real
savetxt(save_filepathcov, cov, delimiter=',')

In [None]:
from sklearn.decomposition import  PCA
import scipy.linalg as la


n_components = 30
data = total_embO_list
save_filepath = 'embsOpca.csv'

#perform PCA decomposition of the data
pca = PCA(n_components)
pca.fit()
x_pca = pca.transform(data)
cov = pca.get_covariance()
eig, ev = la.eig(cov)
total = sum(eig.real)
evt=np.transpose(ev)
unit=np.matmul(ev,evt)


save_filepathpca = save_filepath.replace('.csv','transform.csv')
savetxt(save_filepathpca, x_pca, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','eig.csv')
eig=eig.real
savetxt(save_filepatheig, eig, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','ev.csv')
ev=ev.real
savetxt(save_filepatheig, ev, delimiter=',')
save_filepathcov = save_filepath.replace('.csv','cov.csv')
cov=cov.real
savetxt(save_filepathcov, cov, delimiter=',')

In [None]:
from sklearn.decomposition import  PCA
import scipy.linalg as la


n_components = 30
data = int0H_list
save_filepath = 'int0Hpca.csv'

#perform PCA decomposition of the data
pca = PCA(n_components)
pca.fit()
x_pca = pca.transform(data)
cov = pca.get_covariance()
eig, ev = la.eig(cov)
total = sum(eig.real)
evt=np.transpose(ev)
unit=np.matmul(ev,evt)


save_filepathpca = save_filepath.replace('.csv','transform.csv')
savetxt(save_filepathpca, x_pca, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','eig.csv')
eig=eig.real
savetxt(save_filepatheig, eig, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','ev.csv')
ev=ev.real
savetxt(save_filepatheig, ev, delimiter=',')
save_filepathcov = save_filepath.replace('.csv','cov.csv')
cov=cov.real
savetxt(save_filepathcov, cov, delimiter=',')

In [None]:
from sklearn.decomposition import  PCA
import scipy.linalg as la


n_components = 30
data = int1H_list
save_filepath = 'int1Hpca.csv'

#perform PCA decomposition of the data
pca = PCA(n_components)
pca.fit()
x_pca = pca.transform(data)
cov = pca.get_covariance()
eig, ev = la.eig(cov)
total = sum(eig.real)
evt=np.transpose(ev)
unit=np.matmul(ev,evt)


save_filepathpca = save_filepath.replace('.csv','transform.csv')
savetxt(save_filepathpca, x_pca, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','eig.csv')
eig=eig.real
savetxt(save_filepatheig, eig, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','ev.csv')
ev=ev.real
savetxt(save_filepatheig, ev, delimiter=',')
save_filepathcov = save_filepath.replace('.csv','cov.csv')
cov=cov.real
savetxt(save_filepathcov, cov, delimiter=',')

In [None]:
from sklearn.decomposition import  PCA
import scipy.linalg as la



n_components = 30
data = int2H_list
save_filepath = 'int2Hpca.csv'

#perform PCA decomposition of the data
pca = PCA(n_components)
pca.fit()
x_pca = pca.transform(data)
cov = pca.get_covariance()
eig, ev = la.eig(cov)
total = sum(eig.real)
evt=np.transpose(ev)
unit=np.matmul(ev,evt)


save_filepathpca = save_filepath.replace('.csv','transform.csv')
savetxt(save_filepathpca, x_pca, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','eig.csv')
eig=eig.real
savetxt(save_filepatheig, eig, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','ev.csv')
ev=ev.real
savetxt(save_filepatheig, ev, delimiter=',')
save_filepathcov = save_filepath.replace('.csv','cov.csv')
cov=cov.real
savetxt(save_filepathcov, cov, delimiter=',')

In [None]:
from sklearn.decomposition import  PCA
import scipy.linalg as la



n_components = 30
data = int3H_list
save_filepath = 'int3Hpca.csv'

#perform PCA decomposition of the data
pca = PCA(n_components)
pca.fit()
x_pca = pca.transform(data)
cov = pca.get_covariance()
eig, ev = la.eig(cov)
total = sum(eig.real)
evt=np.transpose(ev)
unit=np.matmul(ev,evt)


save_filepathpca = save_filepath.replace('.csv','transform.csv')
savetxt(save_filepathpca, x_pca, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','eig.csv')
eig=eig.real
savetxt(save_filepatheig, eig, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','ev.csv')
ev=ev.real
savetxt(save_filepatheig, ev, delimiter=',')
save_filepathcov = save_filepath.replace('.csv','cov.csv')
cov=cov.real
savetxt(save_filepathcov, cov, delimiter=',')

In [None]:
from sklearn.decomposition import  PCA
import scipy.linalg as la



n_components = 30
data = int4H_list
save_filepath = 'int4Hpca.csv'

#perform PCA decomposition of the data
pca = PCA(n_components)
pca.fit()
x_pca = pca.transform(data)
cov = pca.get_covariance()
eig, ev = la.eig(cov)
total = sum(eig.real)
evt=np.transpose(ev)
unit=np.matmul(ev,evt)


save_filepathpca = save_filepath.replace('.csv','transform.csv')
savetxt(save_filepathpca, x_pca, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','eig.csv')
eig=eig.real
savetxt(save_filepatheig, eig, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','ev.csv')
ev=ev.real
savetxt(save_filepatheig, ev, delimiter=',')
save_filepathcov = save_filepath.replace('.csv','cov.csv')
cov=cov.real
savetxt(save_filepathcov, cov, delimiter=',')

In [None]:
from sklearn.decomposition import  PCA
import scipy.linalg as la



n_components = 30
data = int5H_list
save_filepath = 'int5Hpca.csv'

#perform PCA decomposition of the data
pca = PCA(n_components)
pca.fit()
x_pca = pca.transform(data)
cov = pca.get_covariance()
eig, ev = la.eig(cov)
total = sum(eig.real)
evt=np.transpose(ev)
unit=np.matmul(ev,evt)


save_filepathpca = save_filepath.replace('.csv','transform.csv')
savetxt(save_filepathpca, x_pca, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','eig.csv')
eig=eig.real
savetxt(save_filepatheig, eig, delimiter=',')
save_filepatheig = save_filepath.replace('.csv','ev.csv')
ev=ev.real
savetxt(save_filepatheig, ev, delimiter=',')
save_filepathcov = save_filepath.replace('.csv','cov.csv')
cov=cov.real
savetxt(save_filepathcov, cov, delimiter=',')

Post-process with pandas and stack the label stuff on all