# Generate ref2015 binding energy for the test set

ref2015-score does not need training. We are using default ref2015 terms for getting the energies of the complex, the ligand and the receptor. Finally we calculate the binding energy as:

$E_{binding} = E_{complex} - (E_{receptor}+E_{ligand})$

In [3]:
import pandas as pd
import numpy as np
import plotly.express as px
import pyrosetta

In [11]:
#load the test set
test_set = pd.read_csv("../Datasets/test_set.csv")
test_set["path"] = test_set["fileloc"]
test_set = test_set[["allele", "peptide", "ba", "binder", "path", "allele_type"]]

In [7]:
from modeller import *
from modeller.automodel import *
from modeller.scripts import complete_pdb
import subprocess


def reformat_path(path):
    prefix = "/home/anja/Documents/"
    new = "../../"
    if not prefix in path: return path
    res = path[len(prefix):]
    return new+res


#extract  receptor and ligand from given PDB
def extract_receptor_ligand(filename):
    env = environ()
    env.libs.topology.read(file='$(LIB)/top_heav.lib')
    env.libs.parameters.read(file='$(LIB)/par.lib')
    mdl = complete_pdb(env, filename)
    
    atmsel_lig = selection(mdl.chains[2])
    atmsel_rec = selection(mdl.chains[0], mdl.chains[1])
    
    lig_name = filename[:filename.find(".pdb")]+"_ligand.pdb"
    rec_name = filename[:filename.find(".pdb")]+"_receptor.pdb"
    atmsel_lig.write(lig_name)
    atmsel_rec.write(rec_name)
    
    return (lig_name, rec_name)

In [8]:
def get_ref2015_binding_energy(fileloc):  
    #extract ligand and receptor
    (lfname, rfname) = extract_receptor_ligand(fileloc)
    
    #score
    scorefxn = pyrosetta.get_fa_scorefxn()
    pose = pyrosetta.pose_from_pdb(lfname)
    l_ene = scorefxn(pose)
    pose = pyrosetta.pose_from_pdb(rfname)
    r_ene = scorefxn(pose)
    pose = pyrosetta.pose_from_pdb(fileloc)
    complex_ene = scorefxn(pose)
    
    #remove_extra files
    command = "rm "+lfname
    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
    process.wait()
    command = "rm "+rfname
    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
    process.wait()
    
    return (complex_ene-(r_ene+l_ene))

In [None]:
import pyrosetta
pyrosetta.init()
test_set["path"] = test_set["path"].apply(reformat_path)
test_set["ref2015-score"] = test_set["path"].apply(get_ref2015_binding_energy)

In [None]:
test_set.to_csv("ref2015-score_testset_results.csv")