## AlphaFold2 pLDDT values

This script will read the pLDDT confidence values from the AF2 model of DfrB1 and save them to a simpler table to have uniform figures in R.

In [1]:
# Load libraries
import re
import os
from collections import OrderedDict
import math
from Bio.PDB import *
from Bio import SeqIO
from Bio.Seq import *
from Bio.SeqRecord import *
import csv
import numpy as np
import pandas as pd

In [2]:
# Read the PDB file
parser = PDBParser()
structure = parser.get_structure("DfrB1", 
                                 "/media/axelle/afe8c733-963d-4db8-a2ee-551a0b73c9d7/Angel/PhD_projects/R67_DMS_December2020/Figures/chimerax/AF2_Results/Dfrb1_6ca29_unrelaxed_rank_1_model_1.pdb")

In [3]:
structure

<Structure id=DfrB1>

In [12]:
# Define a dictionary to go from three-letter residues to one-letter
aa_three2one = {'ALA': 'A', 
               'ARG': 'R',
                'ASN':'N',
                'ASP':'D',
                'CYS':'C',
                'GLU':'E',
                'GLN':'Q',
                'GLY':'G',
                'HIS':'H',
                'ILE':'I',
                'LEU':'L',
                'LYS':'K',
                'MET':'M',
                'PHE':'F',
                'PRO':'P',
                'SER':'S',
                'THR':'T',
                'TRP':'W',
                'TYR':'Y',
                'VAL':'V'
               }

In [14]:
residue_list = []
pLDDT_list = []

curr_pos = 1

# Loop through the residues in the structure
for model in structure:
    for chain in model:
        for residue in chain:
            # print(residue.get_resname())
            new_residue = aa_three2one[residue.get_resname()] + str(curr_pos)
            residue_list.append(new_residue)
            curr_pos = curr_pos + 1
            for atom in residue:
                # The pLDDT is saved as the B-factor in the file and it is the same for
                # all atoms of the same residue
                # print(atom.get_bfactor())
                new_pLDDT = atom.get_bfactor()
                pLDDT_list.append(new_pLDDT)
                break
        break
        print('----')

In [15]:
print(residue_list)
print(pLDDT_list)

['M1', 'E2', 'R3', 'S4', 'S5', 'N6', 'E7', 'V8', 'S9', 'N10', 'P11', 'V12', 'A13', 'G14', 'N15', 'F16', 'V17', 'F18', 'P19', 'S20', 'D21', 'A22', 'T23', 'F24', 'G25', 'M26', 'G27', 'D28', 'R29', 'V30', 'R31', 'K32', 'K33', 'S34', 'G35', 'A36', 'A37', 'W38', 'Q39', 'G40', 'Q41', 'I42', 'V43', 'G44', 'W45', 'Y46', 'C47', 'T48', 'N49', 'L50', 'T51', 'P52', 'E53', 'G54', 'Y55', 'A56', 'V57', 'E58', 'S59', 'E60', 'A61', 'H62', 'P63', 'G64', 'S65', 'V66', 'Q67', 'I68', 'Y69', 'P70', 'V71', 'A72', 'A73', 'L74', 'E75', 'R76', 'I77', 'N78']
[23.38, 24.45, 22.48, 23.5, 21.5, 24.7, 26.87, 26.43, 29.29, 31.6, 34.04, 35.73, 41.4, 42.96, 52.32, 60.52, 66.95, 73.38, 77.34, 78.88, 87.6, 92.23, 96.71, 96.65, 93.37, 94.18, 96.29, 97.76, 98.81, 98.96, 98.95, 98.95, 98.79, 98.61, 98.59, 98.92, 98.96, 98.96, 98.95, 98.91, 98.85, 98.39, 98.24, 96.56, 96.25, 97.54, 96.33, 98.53, 98.19, 98.73, 98.56, 97.27, 97.12, 98.46, 98.76, 98.85, 98.97, 98.97, 98.97, 98.81, 98.91, 98.93, 98.9, 98.9, 98.96, 98.98, 98.96, 

In [17]:
# Organize the data in a dataframe
data_dict = {
    'Residue': residue_list,
    'pLDDT': pLDDT_list
}
new_df = pd.DataFrame(data_dict)
new_df

Unnamed: 0,Residue,pLDDT
0,M1,23.38
1,E2,24.45
2,R3,22.48
3,S4,23.50
4,S5,21.50
...,...,...
73,L74,98.92
74,E75,98.90
75,R76,98.38
76,I77,97.27


In [22]:
# Save the dataframe
new_df.to_csv('/media/axelle/afe8c733-963d-4db8-a2ee-551a0b73c9d7/Angel/PhD_projects/R67_DMS_December2020/Figures/chimerax/AF2_Results/model1_pLDDT.txt',
              sep = '\t', index = False)