## Colouring of PDB structures using pLLDT values (Version AlphaFold2 and below)

This code is written by Eugene Gan, Ng Jing Ting and Tay Hui Yi for SP3172 - Decoding Capsule Synthesis, under the guidance of Prof. Sham Lok To.

In [1]:
# Import all required packages 

from Bio.PDB import *
import pickle
import glob
import os

In [None]:
# Change working directory to the folder (*) with the PDB structure to be changed 

os.chdir(*)

In [None]:
# Check working directory 

print(os.getcwd())

In [None]:
# Object to read the pdb file (*.pdb)

parser = PDBParser()

In [None]:
# Method 1: we can read *.pdb as it's structure 

structure = parser.get_structure("mol", "*.pdb")

In [None]:
# Method 2: we can view *.pdb as a dictionary

handle = open("*.pdb", "r")
header_dict = parse_pdb_header(handle)
handle.close()

In [None]:
# Check b-factor all of atoms

for atom in structure.get_atoms():
    print(atom.get_bfactor())

In [None]:
# Check to see if the structure obj is properly initialised

counter = 0
for atom in structure.get_atoms():
    print(atom)
    counter += 1
    print(counter) # Corresponds to the number of atoms in *.pdb

In [None]:
# Read the pkl file (*.pkl) generated from AlphaFold to obtain the pLDDT values (i.e. new B factors)

with open('*.pkl', 'rb') as pickle_file:
    new_data = pickle.load(pickle_file)

In [None]:
# Write text in pkl file into a normal file that can be read easily 
# Check of accurate extraction of pLDDT values

f = open("pkl_output", "w")
f.write(str(new_data))
f.close()

In [None]:
# Check if pkl file has been written correctly 

f = open("pkl_output", "r")
print(f.read())

In [None]:
# Write pLDDT values into a new file

f = open("pLDDT_values", "w")
count = 0 #track the number of pLDDT values

for k, v in new_data.items():
    if k == "plddt":
        for i in v:
            f.write(str(i))
            count += 1
            f.write('\n')
f.close()

In [None]:
# Check if pLDDT values are correctly formatted

f = open("plDDT_values", "r")
print(f.read())
print(count) # Corresponds to the number of atoms in *.pdb 

In [None]:
# Write b-factor values into a list

inFile = open("pLDDT_values", "r")
lines = inFile.readlines()
newB = []
for line in lines:
    val = float(line[:-2])
    newB.append(val)

In [None]:
# Check if the confidence values are properly appended as float values

print(len(newB))
print(newB)

In [None]:
#Get the residues in the structure and place them in a list
res_lst = Selection.unfold_entities(structure, "R")
res_lst

In [None]:
# Each pLDDT score corresponds to a particular residue
# Change all b-factor values of atoms under a particular residue to its respective pLDDT score

counter = 0
for atom in structure.get_atoms():
    counter += 1
    res = atom.get_parent()
    counter = res_lst.index(res)
    atom.set_bfactor(newB[counter])
print(counter) # Corresponds to the number of atoms in *.pdb

In [None]:
# To download the new PDB structure with updated b factor values (filename.pdb)

io = PDBIO()
io.set_structure(structure)
io.save("filename.pdb")