In [213]:
from Bio.PDB import PDBParser
import numpy as np
import nglview
import os
import pandas as pd
from scipy.spatial.distance import cdist

In [214]:
rf_pdb='./30AUG24_FJOM/beta_wts/_105.pdb'
CD20_D='./6vja_relaxed_aligned_chainD.pdb'

In [215]:
view = nglview.show_structure_file(rf_pdb)  
view.add_component(CD20_D,color='blue')
view

NGLWidget()

In [218]:
def dir_to_df(directory='./',weights=None):
    # List all .pdb files in the directory
    pdb_files = [f for f in os.listdir(directory) if f.endswith('.pdb')]

    # Gather file paths and modified names
    file_info = []
    for file in pdb_files:
        full_path = os.path.join(directory, file)  # Full path to the file
        # Remove underscores and strip '.pdb' extension
        file_name = os.path.basename(file).replace('_', '').replace('.pdb', '')
        file_info.append({'RFDiff Path': full_path, 'File Name': file_name})

    df = pd.DataFrame(file_info)
    
    if weights:
        df['RF wts']=weights
        
    return df

In [219]:

# Create a DataFrame
nrm_wts_df = dir_to_df('../RFdiffusion/30AUG24_FJOM/nrm_wts/',weights='normal')
beta_wts_df = dir_to_df('../RFdiffusion/30AUG24_FJOM/beta_wts/',weights='beta')


In [232]:
def clashes_pdb_compare(pdb1=str,pdb2=CD20_D,threshold=2.0):
    
    def get_coordinates(pdb_file):
        parser = PDBParser(QUIET=True)
        structure = parser.get_structure('structure', pdb_file)

        atoms = [atom for atom in structure.get_atoms()]
        coords = np.array([atom.coord for atom in atoms])

        return coords

    # Load coordinates from both PDB files
    pdb1_coords = get_coordinates(pdb1)
    pdb2_coords = get_coordinates(pdb2)

    # Calculate pairwise distances between all atoms in pdb1 and pdb2
    distances = cdist(pdb1_coords, pdb2_coords)

    # Find overlaps/clashes
    overlaps = np.where(distances < threshold)

    # List atom pairs that are clashing
    clashes = [(i, j, distances[i, j]) for i, j in zip(*overlaps)]

    # Print the results
    print(f"Number of clashes: {len(clashes)}")
    #for clash in clashes:
    #    print(f"Atom in pdb1 (index {clash[0]}) and atom in pdb2 (index {clash[1]}) are within {clash[2]:.2f} Å")
    return len(clashes)

In [220]:
all_RFdiff=pd.concat([nrm_wts_df,beta_wts_df],ignore_index=True)

In [221]:
all_RFdiff['Num Clashes'] = all_RFdiff['RFDiff Path'].apply(clashes_pdb_compare)

Number of clashes: 355
Number of clashes: 424
Number of clashes: 527
Number of clashes: 303
Number of clashes: 334
Number of clashes: 320
Number of clashes: 58
Number of clashes: 454
Number of clashes: 469
Number of clashes: 472
Number of clashes: 346
Number of clashes: 81
Number of clashes: 1
Number of clashes: 375
Number of clashes: 392
Number of clashes: 319
Number of clashes: 364
Number of clashes: 48
Number of clashes: 543
Number of clashes: 519
Number of clashes: 335
Number of clashes: 379
Number of clashes: 583
Number of clashes: 633
Number of clashes: 386
Number of clashes: 598
Number of clashes: 39
Number of clashes: 302
Number of clashes: 361
Number of clashes: 2
Number of clashes: 224
Number of clashes: 411
Number of clashes: 195
Number of clashes: 236
Number of clashes: 504
Number of clashes: 219
Number of clashes: 685
Number of clashes: 505
Number of clashes: 8
Number of clashes: 59
Number of clashes: 3
Number of clashes: 2
Number of clashes: 558
Number of clashes: 190
Num

Number of clashes: 2
Number of clashes: 266
Number of clashes: 212
Number of clashes: 379
Number of clashes: 664
Number of clashes: 827
Number of clashes: 344
Number of clashes: 367
Number of clashes: 702
Number of clashes: 383
Number of clashes: 357
Number of clashes: 415
Number of clashes: 493
Number of clashes: 459
Number of clashes: 2
Number of clashes: 1
Number of clashes: 371
Number of clashes: 513
Number of clashes: 70
Number of clashes: 268
Number of clashes: 373
Number of clashes: 2
Number of clashes: 319
Number of clashes: 375
Number of clashes: 369
Number of clashes: 92
Number of clashes: 1
Number of clashes: 446
Number of clashes: 401
Number of clashes: 2
Number of clashes: 1
Number of clashes: 556
Number of clashes: 2
Number of clashes: 1
Number of clashes: 513
Number of clashes: 607
Number of clashes: 458
Number of clashes: 1
Number of clashes: 397
Number of clashes: 286
Number of clashes: 956
Number of clashes: 347
Number of clashes: 1027
Number of clashes: 511
Number of

Number of clashes: 938
Number of clashes: 388
Number of clashes: 576
Number of clashes: 515
Number of clashes: 414
Number of clashes: 296
Number of clashes: 113
Number of clashes: 318
Number of clashes: 289
Number of clashes: 421
Number of clashes: 242
Number of clashes: 160
Number of clashes: 460
Number of clashes: 452
Number of clashes: 603
Number of clashes: 308
Number of clashes: 737
Number of clashes: 518
Number of clashes: 444
Number of clashes: 390
Number of clashes: 288
Number of clashes: 398
Number of clashes: 212
Number of clashes: 952
Number of clashes: 602
Number of clashes: 262
Number of clashes: 905
Number of clashes: 1
Number of clashes: 390
Number of clashes: 328
Number of clashes: 190
Number of clashes: 374
Number of clashes: 338
Number of clashes: 387
Number of clashes: 380
Number of clashes: 193
Number of clashes: 422
Number of clashes: 423
Number of clashes: 39
Number of clashes: 325
Number of clashes: 171
Number of clashes: 229
Number of clashes: 396
Number of clas

Number of clashes: 1
Number of clashes: 467
Number of clashes: 753
Number of clashes: 651
Number of clashes: 229
Number of clashes: 621
Number of clashes: 539
Number of clashes: 662
Number of clashes: 1
Number of clashes: 237
Number of clashes: 714
Number of clashes: 336
Number of clashes: 137
Number of clashes: 533
Number of clashes: 1
Number of clashes: 600
Number of clashes: 549
Number of clashes: 1
Number of clashes: 514
Number of clashes: 1
Number of clashes: 158
Number of clashes: 385
Number of clashes: 459
Number of clashes: 1
Number of clashes: 466
Number of clashes: 142
Number of clashes: 427
Number of clashes: 366
Number of clashes: 259
Number of clashes: 630
Number of clashes: 682
Number of clashes: 608
Number of clashes: 658
Number of clashes: 120
Number of clashes: 3
Number of clashes: 442
Number of clashes: 270
Number of clashes: 366
Number of clashes: 567
Number of clashes: 636
Number of clashes: 386
Number of clashes: 486
Number of clashes: 190
Number of clashes: 557
Nu

Number of clashes: 627
Number of clashes: 578
Number of clashes: 633
Number of clashes: 545
Number of clashes: 443
Number of clashes: 3
Number of clashes: 611
Number of clashes: 919
Number of clashes: 1
Number of clashes: 559
Number of clashes: 817
Number of clashes: 517
Number of clashes: 641
Number of clashes: 706
Number of clashes: 570
Number of clashes: 686
Number of clashes: 137
Number of clashes: 519
Number of clashes: 720
Number of clashes: 482
Number of clashes: 473
Number of clashes: 569
Number of clashes: 398
Number of clashes: 356
Number of clashes: 421
Number of clashes: 701
Number of clashes: 588
Number of clashes: 649
Number of clashes: 315
Number of clashes: 463
Number of clashes: 621
Number of clashes: 319
Number of clashes: 1
Number of clashes: 23
Number of clashes: 717
Number of clashes: 327
Number of clashes: 1
Number of clashes: 322
Number of clashes: 344
Number of clashes: 325
Number of clashes: 220
Number of clashes: 209
Number of clashes: 551
Number of clashes: 4

Number of clashes: 517
Number of clashes: 462
Number of clashes: 497
Number of clashes: 131
Number of clashes: 558
Number of clashes: 533
Number of clashes: 1
Number of clashes: 16
Number of clashes: 594
Number of clashes: 1
Number of clashes: 86
Number of clashes: 571
Number of clashes: 618
Number of clashes: 1
Number of clashes: 467
Number of clashes: 503
Number of clashes: 496
Number of clashes: 416
Number of clashes: 1
Number of clashes: 587
Number of clashes: 454
Number of clashes: 118
Number of clashes: 630
Number of clashes: 560
Number of clashes: 358
Number of clashes: 618
Number of clashes: 727
Number of clashes: 380
Number of clashes: 727
Number of clashes: 100
Number of clashes: 262
Number of clashes: 734
Number of clashes: 1
Number of clashes: 89
Number of clashes: 694
Number of clashes: 781
Number of clashes: 842
Number of clashes: 1
Number of clashes: 400
Number of clashes: 324
Number of clashes: 457
Number of clashes: 741
Number of clashes: 1
Number of clashes: 482
Numbe

In [None]:
def count_chain_contacts(pdb_file, chain1_id='A', chain2_id='B', threshold=5.0):
    def get_chain_coordinates(structure, chain_id):
        chain = structure[0][chain_id]  # Assuming only one model in the PDB file
        atoms = [atom for atom in chain.get_atoms()]
        coords = np.array([atom.coord for atom in atoms])
        return coords

    # Parse the PDB file and get the structure
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('structure', pdb_file)
    
    # Get coordinates of atoms in each chain
    chain1_coords = get_chain_coordinates(structure, chain1_id)
    chain2_coords = get_chain_coordinates(structure, chain2_id)
    
    # Calculate pairwise distances between all atoms in chain1 and chain2
    distances = cdist(chain1_coords, chain2_coords)
    
    # Find contacts (pairs of atoms within the distance threshold)
    contacts = np.where(distances < threshold)
    
    # List atom pairs that are in contact
    contacts_list = [(i, j, distances[i, j]) for i, j in zip(*contacts)]
    
    # Print the results
    print(f"Number of contacts: {len(contacts_list)}")
    #for contact in contacts_list:
    #    print(f"Atom in chain1 (index {contact[0]}) and atom in chain2 (index {contact[1]}) are within {contact[2]:.2f} Å")
    
    return len(contacts_list)

In [222]:
RFdiff_cleaned = all_RFdiff[all_RFdiff['Num Clashes']<5]


In [223]:
RFdiff_cleaned['Num RFDiff contacts'] = RFdiff_cleaned['RFDiff Path'].apply(count_chain_contacts)


Number of contacts: 14
Number of contacts: 17
Number of contacts: 17
Number of contacts: 6
Number of contacts: 15
Number of contacts: 32
Number of contacts: 39
Number of contacts: 0
Number of contacts: 18
Number of contacts: 6
Number of contacts: 20
Number of contacts: 21
Number of contacts: 47
Number of contacts: 37
Number of contacts: 15
Number of contacts: 12
Number of contacts: 22
Number of contacts: 16
Number of contacts: 14
Number of contacts: 16
Number of contacts: 24
Number of contacts: 34
Number of contacts: 6
Number of contacts: 22
Number of contacts: 13
Number of contacts: 34
Number of contacts: 16
Number of contacts: 21
Number of contacts: 17
Number of contacts: 13
Number of contacts: 14
Number of contacts: 2
Number of contacts: 18
Number of contacts: 2
Number of contacts: 0
Number of contacts: 17
Number of contacts: 35
Number of contacts: 10
Number of contacts: 12
Number of contacts: 16
Number of contacts: 16
Number of contacts: 19
Number of contacts: 1
Number of contacts:

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  RFdiff_cleaned['Num RFDiff contacts'] = RFdiff_cleaned['RFDiff Path'].apply(count_chain_contacts)


In [224]:
RFdiff_cleaned = RFdiff_cleaned[
    RFdiff_cleaned['Num RFDiff contacts']>(np.mean(RFdiff_cleaned['Num RFDiff contacts'])+ \
     np.std(RFdiff_cleaned['Num RFDiff contacts']))]


In [225]:
RFdiff_cleaned=RFdiff_cleaned.sort_values('Num RFDiff contacts',ascending=False)

In [226]:
RFdiff_cleaned = RFdiff_cleaned.reset_index(drop=True)

In [227]:
len(RFdiff_cleaned)

50

In [228]:
view = nglview.show_structure_file(RFdiff_cleaned['RFDiff Path'][0])  
view.add_component(CD20_D,color='blue')
view

NGLWidget()

In [229]:
RFdiff_cleaned['Name']=RFdiff_cleaned['RF wts']+'_'+RFdiff_cleaned['File Name']

In [230]:
RFdiff_cleaned

Unnamed: 0,RFDiff Path,File Name,RF wts,Num Clashes,Num RFDiff contacts,Name
0,../RFdiffusion/30AUG24_FJOM/beta_wts/_872.pdb,872,beta,1,112,beta_872
1,../RFdiffusion/30AUG24_FJOM/beta_wts/_549.pdb,549,beta,2,98,beta_549
2,../RFdiffusion/30AUG24_FJOM/beta_wts/_606.pdb,606,beta,1,85,beta_606
3,../RFdiffusion/30AUG24_FJOM/beta_wts/_642.pdb,642,beta,1,84,beta_642
4,../RFdiffusion/30AUG24_FJOM/beta_wts/_468.pdb,468,beta,1,83,beta_468
5,../RFdiffusion/30AUG24_FJOM/beta_wts/_704.pdb,704,beta,1,82,beta_704
6,../RFdiffusion/30AUG24_FJOM/beta_wts/_402.pdb,402,beta,1,79,beta_402
7,../RFdiffusion/30AUG24_FJOM/beta_wts/_106.pdb,106,beta,1,78,beta_106
8,../RFdiffusion/30AUG24_FJOM/beta_wts/_923.pdb,923,beta,1,77,beta_923
9,../RFdiffusion/30AUG24_FJOM/beta_wts/_394.pdb,394,beta,1,77,beta_394


In [231]:
import shutil

# Define the new directory where you want to copy the files
new_directory = '../PMPNN/01SEP24/'


# Ensure the new directory exists, create it if it doesn't
os.makedirs(new_directory, exist_ok=True)

# Loop over each row in the RFdiff_cleaned DataFrame
for index, row in RFdiff_cleaned.iterrows():
    full_path = row['RFDiff Path']  # Original full path of the pdb file
    new_file_name = row['Name'] + '.pdb'  # New file name based on the 'name' column
    
    # Define the destination path
    destination = os.path.join(new_directory, new_file_name)
    
    # Copy the file to the new directory with the new name
    shutil.copy(full_path, destination)

print("All files have been copied and renamed successfully.")

All files have been copied and renamed successfully.
