In [1]:
import os
import sys
import pandas as pd
import gzip
import gemmi
import config
from tqdm import tqdm
from datetime import datetime
import wget
from calculate import printer,detect_plevin
from addH import addH

ring_atoms_dict = {
    'TRP': ['CD2', 'CE2', 'CE3', 'CZ2', 'CZ3', 'CH2'],
    'TYR': ['CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'CG'],
    'PHE': ['CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'CG'],
    'HIS': ['CE1', 'ND1', 'NE2', 'CG', 'CD2']}
trp_A_dict = {
    'TRP': ['CD1', 'CD2', 'NE1', 'CG', 'CE2']}
printer.print_xhpi()

input_directory = "structures/test_addH"

gz_files = []
for dirpath, _, filenames in os.walk(input_directory):
    for filename in filenames:
        if filename.endswith(".cif.gz"):
            # 将文件的完整路径添加到列表中
            gz_files.append(os.path.join(dirpath, filename))
gz_files

CC     CC    HH   HH             PPPPPPP    II  
 CC   CC     HH   HH             PP   PP    II 
  NN NN      HH   HH     York    PP   PP    II 
   NN        HHHHHHH    ------   PPPPPPP    II 
  OO OO      HH   HH             PP         II 
 OO   OO     HH   HH             PP         II 
SS     SS    HH   HH             PP         II 


['structures/test_addH/test_addH/ni/3nir.cif.gz',
 'structures/test_addH/test_addH/uc/1ucs.cif.gz',
 'structures/test_addH/test_addH/x2/3x2m.cif.gz',
 'structures/test_addH/test_addH/s2/6s2m.cif.gz',
 'structures/test_addH/test_addH/e6/6e6o.cif.gz',
 'structures/test_addH/test_addH/r6/1r6j.cif.gz',
 'structures/test_addH/test_addH/vb/2vb1.cif.gz',
 'structures/test_addH/test_addH/us/1us0.cif.gz',
 'structures/test_addH/test_addH/d8/5d8v.cif.gz',
 'structures/test_addH/test_addH/nw/5nw3.cif.gz']

In [2]:
result = []

with tqdm(total=len(gz_files), desc="Processing files") as pbar:
    for filepath in gz_files:
        try:
            pdb_name = os.path.basename(filepath).replace('.cif.gz', '')

            # Read the gzipped CIF file
            with gzip.open(filepath, 'rb') as file:
                uncompressed_content = file.read().decode('utf-8')
                cif = gemmi.cif.read_string(uncompressed_content).sole_block()
                structure = gemmi.make_structure_from_block(cif)
                resolution = structure.resolution
                model = structure[0]

                for chain in model:
                    for residue in chain:
                        
                        if residue.name in ring_atoms_dict:
                            found_interactions = detect_plevin.detect_plevin(pdb_name, resolution, model, chain, structure, residue, ring_atoms_dict)
                            result.extend(found_interactions)     

                        if residue.name in trp_A_dict:
                            found_interactions = detect_plevin.detect_plevin(pdb_name, resolution, model, chain, structure, residue, trp_A_dict)
                            result.extend(found_interactions)

        except Exception as e:
            print(f"Error processing file {filepath}: {e}")

        finally:
            pbar.update(1)

xhpi_true_count = sum(1 for interaction in result if interaction.get('xhpi') == 1)
print(f"Found {xhpi_true_count} interactions meeting the XH-π criteria")

# Save the calculation results to a CSV file
if result: # Only save if results were found
    df = pd.DataFrame(result)
    output_path = os.path.join(input_directory, 'xhpi_output.csv')
    df.to_csv(output_path, index=False)
    print(f'The result has been saved to {output_path}')
else:
    print("No interactions were found to save.")

Processing files: 100%|██████████| 10/10 [00:00<00:00, 29.18it/s]


Found 115 interactions meeting the XH-π criteria
The result has been saved to structures/test_addH/xhpi_output.csv


In [12]:
found_interactions

[{'pdb_name': '5nw3',
  'resolution': 0.59,
  'mean_b_factor': np.float64(2.84333332379659),
  'pi_chain_name': 'A',
  'pi_residue_number': 48,
  'pi_residue_name': 'PHE',
  'pi_center_array': array([11.01033333,  4.99466667,  2.31266667]),
  'pi_normal_vector': array([ 0.09043071,  0.8727883 , -0.4796487 ]),
  'pi_b_factor': np.float64(3.146666685740153),
  'X_chain_name': 'A',
  'X_residue_num': 6,
  'X_residue_name': 'LYS',
  'X_element_name': 'N',
  'X_atom_name': 'N',
  'X_pos': array([11.967,  9.186, -1.349]),
  'X_b_factor': 2.5399999618530273,
  'H_atom_name': 'H',
  'H_pos': array([11.4925604 ,  8.41930279, -1.49892825]),
  'X_to_pi_center_distance': np.float64(5.647148749590364),
  'XH_picenterAngle': np.float64(119.03312174362475),
  'XPCN_angle': np.float64(13.064803447713814)},
 {'pdb_name': '5nw3',
  'resolution': 0.59,
  'mean_b_factor': np.float64(3.5383333762486773),
  'pi_chain_name': 'A',
  'pi_residue_number': 48,
  'pi_residue_name': 'PHE',
  'pi_center_array': arr

In [13]:
found_interactions[0]["X_to_pi_center_distance"]

np.float64(5.647148749590364)