In [1]:
import os
import pandas as pd
import numpy as np
from Bio import PDB
import pickle

In [2]:
# To map the protein sequence of the PDB structure to the UniProt protein sequence, we performe the alignments using the SSW library. 
# Zhao, M., Lee, W. P., Garrison, E. P., & Marth, G. T. (2013). SSW library: an SIMD Smith-Waterman C/C++ library for use in genomic applications. PloS one, 8(12), e82138.

# Therefore, the alignment results are needed for preprocessing. 
# You must either install the SSW library yourself or utilize the code provided on the MONN github to get alignment results.
# https://github.com/lishuya17/MONN/tree/master/create_dataset/smith-waterman-src

# This GitHub repository does not provide code for protein sequence alignment.
# The example files of aligment results can be found in the ./fasta dir.

### 1. Load data

In [2]:
PDBbind_df = pd.read_csv("./preprocessed_data/step5_PDBbind_data.tsv", sep = "\t")
print(f"[PDBbind]: {len(PDBbind_df)}")

CASF2016_df = pd.read_csv("./preprocessed_data/step5_CASF2016_data.tsv", sep = "\t")
print(f"[CASF2016]: {len(CASF2016_df)}")

CASF2013_df = pd.read_csv("./preprocessed_data/step5_CASF2013_data.tsv", sep = "\t")
print(f"[CASF2013]: {len(CASF2013_df)}")

CSAR2014_df = pd.read_csv("./preprocessed_data/step5_CSAR2014_data.tsv", sep = "\t")
print(f"[CSAR2014]: {len(CSAR2014_df)}")

CSAR2012_df = pd.read_csv("./preprocessed_data/step5_CSAR2012_data.tsv", sep = "\t")
print(f"[CSAR2012]: {len(CSAR2012_df)}")

CSARset1_df = pd.read_csv("./preprocessed_data/step5_CSARset1_data.tsv", sep = "\t")
print(f"[CSARset1]: {len(CSARset1_df)}")

CSARset2_df = pd.read_csv("./preprocessed_data/step5_CSARset2_data.tsv", sep = "\t")
print(f"[CSARset2]: {len(CSARset2_df)}")

Astex_df = pd.read_csv("./preprocessed_data/step5_Astex_data.tsv", sep = "\t")
print(f"[Astex]: {len(Astex_df)}")

COACH420_df = pd.read_csv("./preprocessed_data/step5_COACH420_data.tsv", sep = "\t")
print(f"[COACH420]: {len(COACH420_df)}")

HOLO4K_df = pd.read_csv("./preprocessed_data/step5_HOLO4K_data.tsv", sep = "\t")
print(f"[HOLO4K]: {len(HOLO4K_df)}")

[PDBbind]: 13663
[CASF2016]: 269
[CASF2013]: 172
[CSAR2014]: 46
[CSAR2012]: 55
[CSARset1]: 152
[CSARset2]: 135
[Astex]: 74
[COACH420]: 323
[HOLO4K]: 3911


In [3]:
with open("./preprocessed_data/step5_PDBbind_interaction.pkl", "rb") as f:
    PDBbind_interact_results = pickle.load(f)
    print(f"[PDBbind]: {len(PDBbind_interact_results)}")

with open("./preprocessed_data/step5_CASF2016_interaction.pkl", "rb") as f:
    CASF2016_interact_results = pickle.load(f)
    print(f"[CASF2016]: {len(CASF2016_interact_results)}")
    
with open("./preprocessed_data/step5_CASF2013_interaction.pkl", "rb") as f:
    CASF2013_interact_results = pickle.load(f)
    print(f"[CASF2013]: {len(CASF2013_interact_results)}")
    
with open("./preprocessed_data/step5_CSAR2014_interaction.pkl", "rb") as f:
    CSAR2014_interact_results = pickle.load(f)
    print(f"[CSAR2014]: {len(CSAR2014_interact_results)}")
    
with open("./preprocessed_data/step5_CSAR2012_interaction.pkl", "rb") as f:
    CSAR2012_interact_results = pickle.load(f)
    print(f"[CSAR2012]: {len(CSAR2012_interact_results)}")
    
with open("./preprocessed_data/step5_CSARset1_interaction.pkl", "rb") as f:
    CSARset1_interact_results = pickle.load(f)
    print(f"[CSARset1]: {len(CSARset1_interact_results)}")
    
with open("./preprocessed_data/step5_CSARset2_interaction.pkl", "rb") as f:
    CSARset2_interact_results = pickle.load(f)
    print(f"[CSARset2]: {len(CSARset2_interact_results)}")
    
with open("./preprocessed_data/step5_Astex_interaction.pkl", "rb") as f:
    Astex_interact_results = pickle.load(f)
    print(f"[Astex]: {len(Astex_interact_results)}")
    
with open("./preprocessed_data/step5_COACH420_interaction.pkl", "rb") as f:
    COACH420_interact_results = pickle.load(f)
    print(f"[COACH420]: {len(COACH420_interact_results)}")
    
with open("./preprocessed_data/step5_HOLO4K_interaction.pkl", "rb") as f:
    HOLO4K_interact_results = pickle.load(f)
    print(f"[HOLO4K]: {len(HOLO4K_interact_results)}")

[PDBbind]: 13663
[CASF2016]: 269
[CASF2013]: 172
[CSAR2014]: 46
[CSAR2012]: 55
[CSARset1]: 152
[CSARset2]: 135
[Astex]: 74
[COACH420]: 323
[HOLO4K]: 3911


### 2. Mappling PLIP results

In [4]:
def get_results_dict(path):

    f = open(path, "r")
    i, seq_target, seq_auery, align, result_dict, pdb_ratio_dict = -1, "", "", "", dict(), dict()
    for line in f.readlines():
        i += 1
        if i % 4 == 0:
            if 'target_name' in line:
                if len(seq_target) != 0:
                    result_dict[target_name] = (seq_target, seq_query, align, target_start, query_start)
                target_name = line.strip().split(' ')[-1]
                seq_target, seq_query, align = '', '', ''
            else:
                seq_target += line.split('\t')[1]
                
        elif i % 4 == 1:
            if 'query_name' in line:
                query_name = line.strip().split(' ')[-1]
            else:
                align += line.strip('\n').split('\t')[1]
                
        elif i % 4 == 2:
            if 'optimal_alignment_score' in line:
                for item in line.strip().split('\t'):
                    if item.split(' ')[0] == 'target_begin:':
                        target_start = int(item.split(' ')[1])
                    elif item.split(' ')[0] == 'query_begin:':
                        query_start = int(item.split(' ')[1])
            else:
                seq_query += line.split('\t')[1]
    
    result_dict[target_name] = (seq_target, seq_query, align, target_start, query_start)
    
    f.close()
    return result_dict

def seq_with_gap_to_idx(seq):
    idx_list = []
    i = 0
    for aa in seq:
        if aa == '-':
            idx_list.append(-1)
        else:
            idx_list.append(i)
            i += 1
    return idx_list

def get_target_idx(target_idx_list, query_idx_list, align, target_start, query_start):
    pdb_to_uniprot_idx = []
    for i in range(target_start-1): 
        pdb_to_uniprot_idx.append(-1)
        
    for i in range(len(target_idx_list)): 
        if target_idx_list[i] != -1: 
            if align[i]  == '|' and query_idx_list[i] != -1:
                pdb_to_uniprot_idx.append(query_idx_list[i] + query_start-1) 
            else:
                pdb_to_uniprot_idx.append(-1)
    return pdb_to_uniprot_idx

def get_pdb_to_uniprot_map(result_dict):
    pdb_to_uniprot_map_dict = dict()
    
    for name in result_dict:
        pdbid, chain = name.split('_') 
        seq_target, seq_query, align, target_start, query_start = result_dict[name]

        ratio = float(align.count('|'))/float(len(seq_target.replace('-','')))
        if ratio < 0.9:
            continue

        target_idx_list = seq_with_gap_to_idx(seq_target) 
        query_idx_list = seq_with_gap_to_idx(seq_query) 
        pdb_to_uniprot_idx = get_target_idx(target_idx_list, query_idx_list, align, target_start, query_start)

        if pdbid in pdb_to_uniprot_map_dict:
            pdb_to_uniprot_map_dict[pdbid][chain] = pdb_to_uniprot_idx
        else:
            pdb_to_uniprot_map_dict[pdbid] = {}
            pdb_to_uniprot_map_dict[pdbid][chain] = pdb_to_uniprot_idx
    return pdb_to_uniprot_map_dict

In [5]:
PDBbind_mapping_results = get_results_dict("./fasta/PDBbind_pdb_align.txt")
print(f"[PDBbind]: {len(PDBbind_mapping_results)}")

CASF2016_mapping_results = get_results_dict("./fasta/CASF2016_pdb_align.txt")
print(f"[CASF2016]: {len(CASF2016_mapping_results)}")

CASF2013_mapping_results = get_results_dict("./fasta/CASF2013_pdb_align.txt")
print(f"[CASF2013]: {len(CASF2013_mapping_results)}")

CSAR2014_mapping_results = get_results_dict("./fasta/CSAR2014_pdb_align.txt")
print(f"[CSAR2014]: {len(CSAR2014_mapping_results)}")

CSAR2012_mapping_results = get_results_dict("./fasta/CSAR2012_pdb_align.txt")
print(f"[CSAR2012]: {len(CSAR2012_mapping_results)}")

CSARset1_mapping_results = get_results_dict("./fasta/CSARset1_pdb_align.txt")
print(f"[CSARset1]: {len(CSARset1_mapping_results)}")

CSARset2_mapping_results = get_results_dict("./fasta/CSARset2_pdb_align.txt")
print(f"[CSARset2]: {len(CSARset2_mapping_results)}")

Astex_mapping_results = get_results_dict("./fasta/Astex_pdb_align.txt")
print(f"[Astex]: {len(Astex_mapping_results)}")

COACH420_mapping_results = get_results_dict("./fasta/COACH420_pdb_align.txt")
print(f"[COACH420]: {len(COACH420_mapping_results)}")

HOLO4K_mapping_results = get_results_dict("./fasta/HOLO4K_pdb_align.txt")
print(f"[HOLO4K]: {len(HOLO4K_mapping_results)}")

[PDBbind]: 29311
[CASF2016]: 588
[CASF2013]: 341
[CSAR2014]: 47
[CSAR2012]: 73
[CSARset1]: 301
[CSARset2]: 257
[Astex]: 142
[COACH420]: 522
[HOLO4K]: 5696


In [6]:
PDBbind_pdb_to_uniprot_map_dict = get_pdb_to_uniprot_map(PDBbind_mapping_results)
print(f"[PDBbind]: {len(PDBbind_pdb_to_uniprot_map_dict)}")

CASF2016_pdb_to_uniprot_map_dict = get_pdb_to_uniprot_map(CASF2016_mapping_results)
print(f"[CASF2016]: {len(CASF2016_pdb_to_uniprot_map_dict)}")

CASF2013_pdb_to_uniprot_map_dict = get_pdb_to_uniprot_map(CASF2013_mapping_results)
print(f"[CASF2013]: {len(CASF2013_pdb_to_uniprot_map_dict)}")

CSAR2014_pdb_to_uniprot_map_dict = get_pdb_to_uniprot_map(CSAR2014_mapping_results)
print(f"[CSAR2014]: {len(CSAR2014_pdb_to_uniprot_map_dict)}")

CSAR2012_pdb_to_uniprot_map_dict = get_pdb_to_uniprot_map(CSAR2012_mapping_results)
print(f"[CSAR2012]: {len(CSAR2012_pdb_to_uniprot_map_dict)}")

CSARset1_pdb_to_uniprot_map_dict = get_pdb_to_uniprot_map(CSARset1_mapping_results)
print(f"[CSARset1]: {len(CSARset1_pdb_to_uniprot_map_dict)}")

CSARset2_pdb_to_uniprot_map_dict = get_pdb_to_uniprot_map(CSARset2_mapping_results)
print(f"[CSARset2]: {len(CSARset2_pdb_to_uniprot_map_dict)}")

Astex_pdb_to_uniprot_map_dict = get_pdb_to_uniprot_map(Astex_mapping_results)
print(f"[Astex]: {len(Astex_pdb_to_uniprot_map_dict)}")

COACH420_pdb_to_uniprot_map_dict = get_pdb_to_uniprot_map(COACH420_mapping_results)
print(f"[COACH420]: {len(COACH420_pdb_to_uniprot_map_dict)}")

HOLO4K_pdb_to_uniprot_map_dict = get_pdb_to_uniprot_map(HOLO4K_mapping_results)
print(f"[HOLO4K]: {len(HOLO4K_pdb_to_uniprot_map_dict)}")

[PDBbind]: 14433
[CASF2016]: 268
[CASF2013]: 170
[CSAR2014]: 46
[CSAR2012]: 55
[CSARset1]: 152
[CSARset2]: 132
[Astex]: 74
[COACH420]: 266
[HOLO4K]: 3167


In [8]:
def read_fasta(fasta_file):
    uniprot_seq_dict = {}
    f = open(fasta_file)
    for line in f.readlines():
        if line[0] == '>':
            pdbid = line.split('_')[0][1:]
            chain = line.strip().split("_")[1]
            name = line.strip().split('_')[-1]
        else:
            seq = line.strip()
            #uniprot_seq_dict[pdbid] = (seq,name)
            uniprot_seq_dict[f"{pdbid}_{chain}"] = (seq, name)
    f.close()
    return uniprot_seq_dict

def get_interact_in_uniprot_seq(pdb_to_uniprot, seq_dict, residue_interact):    
    interact_in_uniprot_seq_list, residue_bond_type, interact_in_uniprot_seq_set, residue_record = list(), list(), set(), ""
    
    for item in residue_interact: 

        chain, idx = item[0][0], int(item[0][1:]) 
        if chain not in pdb_to_uniprot:
            continue
        
        sequence, idx_list = seq_dict[chain] 
        idx_list = list(map(int, idx_list.split(",")))

        if idx_list.count(idx) != 1: 
            print(idx, idx_list.count(idx), 'idx_list.count(idx) != 1')
        
        seq_pos = idx_list.index(idx) 
        if seq_pos >= len(pdb_to_uniprot[chain]): 
            continue 
        
        if pdb_to_uniprot[chain][seq_pos] == -1: 
            continue
        
        interact_idx = pdb_to_uniprot[chain][seq_pos]

        interact_in_uniprot_seq_set.add(interact_idx)
        interact_in_uniprot_seq_list.append(interact_idx)
        residue_bond_type.append((interact_idx, item[2]))
        residue_record += item[1] 

    return interact_in_uniprot_seq_list, residue_record, residue_bond_type, chain
        
def mapping_interaction(interaction_dict, pdb_to_uniprot_map_dict, fasta_file):

    i, count_no_uniprot_map, count_not_in_uniprot_seq_dict, uniprot_seq_dict, final_results = 0, 0, 0, read_fasta(fasta_file), dict()

    for item in interaction_dict:
        
        pdbid, ligand = item.split("_")

        if pdbid not in pdb_to_uniprot_map_dict:
            count_no_uniprot_map += 1
            i += 1
            print(f"[Error: {i}] PDB id: {pdbid}, Ligand code: {ligand} is not in uniprot mapping dict")
            continue

        seq_dict = interaction_dict[f"{pdbid}_{ligand}"]["sequence"]
        residue_interact = interaction_dict[f"{pdbid}_{ligand}"]["residue_interact"]

        assert residue_interact is not None

        pdb_to_uniprot = pdb_to_uniprot_map_dict[pdbid]

        interact_in_uniprot_seq_list, residue_record, residue_bond_type, chain = get_interact_in_uniprot_seq(pdb_to_uniprot, seq_dict, residue_interact)      

        uniprot_seq, uniprot_id = uniprot_seq_dict[f"{pdbid}_{chain}"]

        if residue_record == ''.join(np.array([aa for aa in uniprot_seq])[interact_in_uniprot_seq_list].tolist()):

            final_results[f"{pdbid}_{ligand}"] = {}
            final_results[f"{pdbid}_{ligand}"]['atom_idx'] = interaction_dict[pdbid+'_'+ligand]['atom_idx']
            final_results[f"{pdbid}_{ligand}"]['atom_name'] = interaction_dict[pdbid+'_'+ligand]['atom_name']
            final_results[f"{pdbid}_{ligand}"]['atom_element'] = interaction_dict[pdbid+'_'+ligand]['atom_element']
            final_results[f"{pdbid}_{ligand}"]['atom_interact'] = interaction_dict[pdbid+'_'+ligand]['atom_interact']
            final_results[f"{pdbid}_{ligand}"]['atom_bond_type'] = interaction_dict[pdbid+'_'+ligand]['atom_bond_type']
            final_results[f"{pdbid}_{ligand}"]['uniprot_id'] = uniprot_id
            final_results[f"{pdbid}_{ligand}"]['uniprot_seq'] = uniprot_seq
            final_results[f"{pdbid}_{ligand}"]['interact_in_uniprot_seq'] = interact_in_uniprot_seq_list
            final_results[f"{pdbid}_{ligand}"]['residue_bond_type'] = residue_bond_type

    return final_results

#### 2.1 PDBbind dataset

In [9]:
PDBbind_results = mapping_interaction(PDBbind_interact_results, PDBbind_pdb_to_uniprot_map_dict, "./fasta/PDBbind_target_uniprot_pdb.fasta")
print(f"[PDBbind]: {len(PDBbind_results)}")

[Error: 1] PDB id: 5wqc, Ligand code: 7MA is not in uniprot mapping dict
[Error: 2] PDB id: 4xy2, Ligand code: 490 is not in uniprot mapping dict
[Error: 3] PDB id: 4ymj, Ligand code: 4EJ is not in uniprot mapping dict
[Error: 4] PDB id: 6drz, Ligand code: H8J is not in uniprot mapping dict
[Error: 5] PDB id: 3sv7, Ligand code: SV6 is not in uniprot mapping dict
[Error: 6] PDB id: 5jga, Ligand code: 6KC is not in uniprot mapping dict
[Error: 7] PDB id: 6nrh, Ligand code: KYP is not in uniprot mapping dict
[Error: 8] PDB id: 6dm8, Ligand code: 6AK is not in uniprot mapping dict
[Error: 9] PDB id: 3suf, Ligand code: SUE is not in uniprot mapping dict
[Error: 10] PDB id: 5c2e, Ligand code: 4Y1 is not in uniprot mapping dict
[Error: 11] PDB id: 4v25, Ligand code: SZ6 is not in uniprot mapping dict
[Error: 12] PDB id: 6a94, Ligand code: ZOT is not in uniprot mapping dict
[Error: 13] PDB id: 6kdz, Ligand code: D79 is not in uniprot mapping dict
[Error: 14] PDB id: 5e7r, Ligand code: 5KW is n

[Error: 133] PDB id: 5iub, Ligand code: 6DV is not in uniprot mapping dict
[Error: 134] PDB id: 4wn1, Ligand code: 3SJ is not in uniprot mapping dict
[Error: 135] PDB id: 2wey, Ligand code: EV1 is not in uniprot mapping dict
[Error: 136] PDB id: 4gs6, Ligand code: 1FM is not in uniprot mapping dict
[Error: 137] PDB id: 6me3, Ligand code: JEY is not in uniprot mapping dict
[Error: 138] PDB id: 3ggu, Ligand code: 017 is not in uniprot mapping dict
[Error: 139] PDB id: 3odu, Ligand code: ITD is not in uniprot mapping dict
[Error: 140] PDB id: 5tzy, Ligand code: 7OS is not in uniprot mapping dict
[Error: 141] PDB id: 4qkx, Ligand code: 35V is not in uniprot mapping dict
[Error: 142] PDB id: 4ie6, Ligand code: UN9 is not in uniprot mapping dict
[Error: 143] PDB id: 3v5q, Ligand code: 0F4 is not in uniprot mapping dict
[Error: 144] PDB id: 6me4, Ligand code: ML2 is not in uniprot mapping dict
[Error: 145] PDB id: 5ws3, Ligand code: 7MA is not in uniprot mapping dict
[Error: 146] PDB id: 5kw2

#### 2.2 CASF2016 dataset

In [10]:
CASF2016_results = mapping_interaction(CASF2016_interact_results, CASF2016_pdb_to_uniprot_map_dict, "./fasta/CASF2016_target_uniprot_pdb.fasta")
print(f"[CASF2016]: {len(CASF2016_results)}")

[Error: 1] PDB id: 5c28, Ligand code: 4XV is not in uniprot mapping dict
[CASF2016]: 268


#### 2.3 CASF2013 dataset

In [11]:
CASF2013_results = mapping_interaction(CASF2013_interact_results, CASF2013_pdb_to_uniprot_map_dict, "./fasta/CASF2013_target_uniprot_pdb.fasta")
print(f"[CASF2013]: {len(CASF2013_results)}")

[Error: 1] PDB id: 3su2, Ligand code: TSV is not in uniprot mapping dict
[Error: 2] PDB id: 3su3, Ligand code: SU3 is not in uniprot mapping dict
[Error: 3] PDB id: 3su5, Ligand code: SU3 is not in uniprot mapping dict
[CASF2013]: 169


#### 2.4 CSAR2014 dataset

In [12]:
CSAR2014_results = mapping_interaction(CSAR2014_interact_results, CSAR2014_pdb_to_uniprot_map_dict, "./fasta/CSAR2014_target_uniprot_pdb.fasta")
print(f"[CSAR2014]: {len(CSAR2014_results)}")

[CSAR2014]: 46


#### 2.5 CSAR2012 dataset

In [13]:
CSAR2012_results = mapping_interaction(CSAR2012_interact_results, CSAR2012_pdb_to_uniprot_map_dict, "./fasta/CSAR2012_target_uniprot_pdb.fasta")
print(f"[CSAR2012]: {len(CSAR2012_results)}")

[CSAR2012]: 55


#### 2.6 CSARset1 dataset

In [14]:
CSARset1_results = mapping_interaction(CSARset1_interact_results, CSARset1_pdb_to_uniprot_map_dict, "./fasta/CSARset1_target_uniprot_pdb.fasta")
print(f"[CSARset1]: {len(CSARset1_results)}") 

[CSARset1]: 152


#### 2.7 CSARset2 dataset

In [15]:
CSARset2_results = mapping_interaction(CSARset2_interact_results, CSARset2_pdb_to_uniprot_map_dict, "./fasta/CSARset2_target_uniprot_pdb.fasta")
print(f"[CSARset2]: {len(CSARset2_results)}")

[Error: 1] PDB id: 1lnm, Ligand code: DTX is not in uniprot mapping dict
[Error: 2] PDB id: 1zhy, Ligand code: CLR is not in uniprot mapping dict
[Error: 3] PDB id: 1zhx, Ligand code: HC3 is not in uniprot mapping dict
[CSARset2]: 132


#### 2.8 Astex dataset

In [16]:
Astex_results = mapping_interaction(Astex_interact_results, Astex_pdb_to_uniprot_map_dict, "./fasta/Astex_target_uniprot_pdb.fasta")
print(f"[Astex]: {len(Astex_results)}")

[Astex]: 74


#### 2.9 COACH420 dataset

In [17]:
COACH420_results = mapping_interaction(COACH420_interact_results, COACH420_pdb_to_uniprot_map_dict, "./fasta/COACH420_target_uniprot_pdb.fasta")
print(f"[COACH420]: {len(COACH420_results)}")

[Error: 1] PDB id: 3hvl, Ligand code: SRL is not in uniprot mapping dict
[COACH420]: 322


#### 2.10 HOLO4K dataset

In [18]:
HOLO4K_results = mapping_interaction(HOLO4K_interact_results, HOLO4K_pdb_to_uniprot_map_dict, "./fasta/HOLO4K_target_uniprot_pdb.fasta")
print(f"[HOLO4K]: {len(HOLO4K_results)}")

[Error: 1] PDB id: 1brw, Ligand code: URA is not in uniprot mapping dict
[Error: 2] PDB id: 1jf0, Ligand code: CZH is not in uniprot mapping dict
[Error: 3] PDB id: 1lke, Ligand code: DOG is not in uniprot mapping dict
[Error: 4] PDB id: 1lnm, Ligand code: DTX is not in uniprot mapping dict
[Error: 5] PDB id: 1men, Ligand code: GAR is not in uniprot mapping dict
[Error: 6] PDB id: 1n0s, Ligand code: FLU is not in uniprot mapping dict
[Error: 7] PDB id: 1n3i, Ligand code: DIH is not in uniprot mapping dict
[Error: 8] PDB id: 1nsa, Ligand code: BEN is not in uniprot mapping dict
[Error: 9] PDB id: 1swg, Ligand code: BTN is not in uniprot mapping dict
[Error: 10] PDB id: 4aig, Ligand code: FLX is not in uniprot mapping dict
[HOLO4K]: 3901


### 3. Save data

In [19]:
with open("./preprocessed_data/step6_PDBbind_data.pkl", "wb") as f:
    pickle.dump(PDBbind_results,f)
    
with open("./preprocessed_data/step6_CASF2016_data.pkl", "wb") as f:
    pickle.dump(CASF2016_results,f)
    
with open("./preprocessed_data/step6_CASF2013_data.pkl", "wb") as f:
    pickle.dump(CASF2013_results,f)
    
with open("./preprocessed_data/step6_CSAR2014_data.pkl", "wb") as f:
    pickle.dump(CSAR2014_results,f)
    
with open("./preprocessed_data/step6_CSAR2012_data.pkl", "wb") as f:
    pickle.dump(CSAR2012_results,f)
    
with open("./preprocessed_data/step6_CSARset1_data.pkl", "wb") as f:
    pickle.dump(CSARset1_results,f)
    
with open("./preprocessed_data/step6_CSARset2_data.pkl", "wb") as f:
    pickle.dump(CSARset2_results,f)
    
with open("./preprocessed_data/step6_Astex_data.pkl", "wb") as f:
    pickle.dump(Astex_results,f)
    
with open("./preprocessed_data/step6_COACH420_data.pkl", "wb") as f:
    pickle.dump(COACH420_results,f)
    
with open("./preprocessed_data/step6_HOLO4K_data.pkl", "wb") as f:
    pickle.dump(HOLO4K_results,f)