In [8]:
import pandas as pd
import numpy as np
import gemmi
import reciprocalspaceship as rs
from tqdm import tqdm
import glob, os

In [3]:
file_path = './ligand_cif_to_dataset_mapping.txt'

apo_samples = []
with open(file_path, 'r') as file:
    for line in file:
        if not line.strip().endswith('.cif'):
            apo_samples.append(line.strip()[-5:-1])

In [4]:
len(apo_samples)

41

In [6]:
my_dir                             = "/n/holyscratch01/hekstra_lab/mhli/drug/"
basepath                           = my_dir + 'pipeline/'

### VAE METRIC APO PEAK VALUE

In [7]:
vae_reconstructed_with_phases_path = basepath + 'vae/reconstructed_w_phases/'

In [10]:
peak_values = []
for pdbid in tqdm(apo_samples):

    # Change the following line to the mtzs files your model created
    mtz_file = gemmi.read_mtz_file(glob.glob(os.path.join(vae_reconstructed_with_phases_path, f"{pdbid}*.mtz"))[0])

    real_grid = mtz_file.transform_f_phi_to_map('diff', 'refine_PH2FOFCWT', sample_rate=3.0)
    real_grid.normalize()

    peak_values.append(np.max(real_grid))

100%|██████████| 41/41 [00:02<00:00, 19.68it/s]


In [11]:
print("Mean Highest Peak Value in Apo models: ", np.mean(peak_values))

Mean Highest Peak Value in Apo models:  5.8870745


In [35]:
for ld in [2, 3, 4, 5, 6, 7, 8, 9]:
    
    for activation in ['relu', 'tanh']:

        peak_values = []

        for pdbid in apo_samples:

            # Change the following line to the mtzs files your model created
            mtz_file = gemmi.read_mtz_file(f'../../pipeline/vae/{activation}/recons-{ld}-phases/PTP1B-y{pdbid}_mrflagsref_idxs_scaled.mtz')

            real_grid = mtz_file.transform_f_phi_to_map('F-obs-diff', 'refine_PH2FOFCWT', sample_rate=3.0)
            real_grid.normalize()

            peak_values.append(np.max(real_grid))
            
        print(activation, ld, np.mean(peak_values))

relu 2 6.3950167
tanh 2 7.219455
relu 3 6.3852262
tanh 3 7.0670295
relu 4 6.426134
tanh 4 6.8995657
relu 5 5.983938
tanh 5 6.552182
relu 6 6.488423
tanh 6 6.756052
relu 7 6.356006
tanh 7 6.4859233
relu 8 6.228035
tanh 8 6.5133843
relu 9 6.2416387
tanh 9 6.250216


### Zmap mean peak value as metric

In [13]:
z_maps_path                        = basepath + 'data/z_maps/'

In [15]:
peak_values = []

for pdbid in tqdm(apo_samples):
    
    # Change the following line to the mtzs files your model created
    zmap = gemmi.read_ccp4_map(z_maps_path + f'PTP1B-y{pdbid}-z_map.native.ccp4')

    real_grid = zmap.grid
    real_grid.normalize()

    peak_values.append(np.max(real_grid))

100%|██████████| 41/41 [00:01<00:00, 37.37it/s]


In [16]:
### Mean peak value as metric
print("Mean Highest Peak Value in Apo models: ", np.mean(peak_values))

Mean Highest Peak Value in Apo models:  5.016709
