In [1]:
import os, sys
import numpy as np
import pandas as pd
from tqdm import tqdm
import pickle as pkl

## Load Target & Drug & Interactions

In [2]:
target_df = pd.read_csv('D:/study_data/DrugTarget/TTD/Processed_target_info.csv')
print(target_df.shape)
target_df.head(2)

(4221, 9)


Unnamed: 0,target_id,uniprot_id,target_name,gene_name,target_type,PDB_id,biochemical_class,sequence,drug_info
0,T00032,OSTP_HUMAN,Osteopontin (SPP1),SPP1,Literature-reported target,3DSF; 3CXD,,MRIAVICFCLLGITCAIPVKQADSGSSEEKQLYNKYPDAVATWLNP...,[]
1,T00033,TGFA_HUMAN,Transforming growth factor alpha (TGFA),TGFA,Clinical trial target,5KN5; 4TGF; 3TGF; 3.00E+50; 2TGF,Growth factor,MVPSAGQLALFALGIVLAACQALENSTSPLSADPPVAAAVVSHFND...,[['D08EIK' 'LY3016859' 'Phase 1/2']]


In [3]:
drug_df = pd.read_csv('D:/study_data/DrugTarget/TTD/Processed_drug_info.csv')
print(drug_df.shape)
drug_df.head(2)

(41818, 9)


Unnamed: 0,drug_id,therap_class,drug_type,InchI,InchIKey,smiles,status,drug_class,compound_class
0,D00AAN,,Small molecular drug,1S/C42H47ClN4O5S/c43-28-12-16-31(17-13-28)53(5...,MSUMHGMGRZWLMN-WXPZYUJUSA-N,C1CCN2CCC3C(=CC(CCC=CC1)(C4C3(C2)CC5N4CCCC(=O)...,Investigative,Investigative Drug(s),
1,D00AAU,,Small molecular drug,1S/C18H22O2/c1-3-17(13-7-5-9-15(19)11-13)18(4-...,KUJAWCSIKNKXLL-UHFFFAOYSA-N,CCC(C1=CC(=CC=C1)O)C(CC)C2=CC(=CC=C2)O,Investigative,Investigative Drug(s),


In [4]:
clinical_data = pd.read_excel('D:/study_data/DrugTarget/TTD/P1-07-Drug-TargetMapping.xlsx')
clinical_data = clinical_data.rename(columns={'TargetID': 'target_id', 'DrugID': 'drug_id'})
print(clinical_data.shape)
clinical_data.head(2)

(44663, 4)


Unnamed: 0,target_id,drug_id,Highest_status,MOA
0,T71390,D07OAC,Investigative,Inhibitor
1,T70309,D07OAC,Investigative,Inhibitor


In [5]:
# merge information
print(clinical_data.shape)
interaction_df = clinical_data.merge(target_df, how='left', on='target_id')
print(interaction_df.shape)
interaction_df = interaction_df.merge(drug_df, how='left', on='drug_id')
print(interaction_df.shape)

(44663, 4)
(44663, 12)
(44663, 20)


In [6]:
interaction_df.head(3)

Unnamed: 0,target_id,drug_id,Highest_status,MOA,uniprot_id,target_name,gene_name,target_type,PDB_id,biochemical_class,sequence,drug_info,therap_class,drug_type,InchI,InchIKey,smiles,status,drug_class,compound_class
0,T71390,D07OAC,Investigative,Inhibitor,S5A2_HUMAN,Steroid 5-alpha-reductase 2 (SRD5A2),SRD5A2,Successful target,,CH-CH donor oxidoreductase,MQVQCQQSPVLAGSATLVALGALALYVAKPSGYGKHTESLKPAATR...,[['D08IWD' 'Finasteride' 'Approved']\n ['D07BH...,,Small molecular drug,1S/C10H13BrN6O3/c11-7-4-8(13)14-2-15-9(4)17(16...,LTTBFVDMHCIDPM-BHBWVORQSA-N,C1=NC(=C2C(=N1)N(N=C2Br)C3C(C(C(O3)CN)O)O)N,Investigative,Investigative Drug(s),
1,T70309,D07OAC,Investigative,Inhibitor,S5A1_HUMAN,Steroid 5-alpha-reductase 1 (SRD5A1),SRD5A1,Clinical trial target,,CH-CH donor oxidoreductase,MATATGVAEERLLAALAYLQCAVGCAVFARNRQTNSVYGRHALPSH...,[['D01CJY' 'FR-146687' 'Phase 2']\n ['D0C1SI' ...,,Small molecular drug,1S/C10H13BrN6O3/c11-7-4-8(13)14-2-15-9(4)17(16...,LTTBFVDMHCIDPM-BHBWVORQSA-N,C1=NC(=C2C(=N1)N(N=C2Br)C3C(C(C(O3)CN)O)O)N,Investigative,Investigative Drug(s),
2,T97071,D0Y6UB,Investigative,Inhibitor,FOLH1_HUMAN,Glutamate carboxypeptidase II (GCPII),FOLH1,Successful target,6HKZ; 6HKJ; 6H7Z; 6H7Y; 6FE5,Peptidase,MWNLLHETDSAVATARRPRWLCAGALVLAGGFFLLGFLFGWFIKSS...,[['D09PGL' 'Capromab' 'Approved']\n ['DBG46D' ...,,Small molecular drug,"1S/C6H11O7P/c7-5(8)2-1-4(6(9)10)3-14(11,12)13/...",ISEYJGQFXSTPMQ-UHFFFAOYSA-N,C(CC(=O)O)C(CP(=O)(O)O)C(=O)O,Investigative,Investigative Drug(s),


In [7]:
keep_uniprot_ids = []
uniprot_ids = interaction_df.uniprot_id.tolist()
for u in uniprot_ids:
    if type(u) == str:
        if '; ' in u:
            keep_uniprot_ids += u.split('; ')
        elif '-' in u:
            keep_uniprot_ids += u.split('-')
        else:
            keep_uniprot_ids.append(u)

keep_uniprot_ids = sorted(list(set(keep_uniprot_ids)))
keep_uniprot_ids = [u for u in keep_uniprot_ids if 'HUMAN' in u]

In [8]:
len(keep_uniprot_ids)

2227

In [9]:
# with open('./data/Binding_data/TTD_bindings_target_ids.txt', 'w') as file:
#     for u in keep_uniprot_ids:
#         file.write(u + '\n')

In [9]:
# id mapping
mapping_df = pd.read_csv('./data/tmp/TTD_bindings_target_mapping.tsv', sep='\t')
mapping_df.head(2)

Unnamed: 0,From,Entry,Reviewed,Entry Name,Protein names,Gene Names,Organism,Length
0,CXE1_HUMAN,A6NN92,reviewed,CXE1_HUMAN,Putative gap junction epsilon-1 protein (Conne...,GJE1,Homo sapiens (Human),205
1,GBRR3_HUMAN,A8MPY1,reviewed,GBRR3_HUMAN,Gamma-aminobutyric acid receptor subunit rho-3...,GABRR3,Homo sapiens (Human),467


In [10]:
mapping_dict = {k: v for k, v in zip(mapping_df.From.tolist(), mapping_df.Entry.tolist())}

def mapping_func(item):
    item = str(item)
    if '; ' in item:
        res = [mapping_dict[i] for i in item.split('; ') if i in mapping_dict]
        res = '|'.join(res) if len(res) > 0 else None
    elif '-' in item:
        res = [mapping_dict[i] for i in item.split('-') if i in mapping_dict]
        res = '|'.join(res) if len(res) > 0 else None
    else:
        res = mapping_dict[item] if item in mapping_dict else None
    return res

interaction_df['uniprot_id'] = interaction_df['uniprot_id'].apply(mapping_func)

In [11]:
sm_interaction_df = interaction_df[interaction_df.drug_type == 'Small molecular drug']
print(sm_interaction_df.shape)

(34656, 20)


In [12]:
sm_interaction_df.head(3)

Unnamed: 0,target_id,drug_id,Highest_status,MOA,uniprot_id,target_name,gene_name,target_type,PDB_id,biochemical_class,sequence,drug_info,therap_class,drug_type,InchI,InchIKey,smiles,status,drug_class,compound_class
0,T71390,D07OAC,Investigative,Inhibitor,P31213,Steroid 5-alpha-reductase 2 (SRD5A2),SRD5A2,Successful target,,CH-CH donor oxidoreductase,MQVQCQQSPVLAGSATLVALGALALYVAKPSGYGKHTESLKPAATR...,[['D08IWD' 'Finasteride' 'Approved']\n ['D07BH...,,Small molecular drug,1S/C10H13BrN6O3/c11-7-4-8(13)14-2-15-9(4)17(16...,LTTBFVDMHCIDPM-BHBWVORQSA-N,C1=NC(=C2C(=N1)N(N=C2Br)C3C(C(C(O3)CN)O)O)N,Investigative,Investigative Drug(s),
1,T70309,D07OAC,Investigative,Inhibitor,P18405,Steroid 5-alpha-reductase 1 (SRD5A1),SRD5A1,Clinical trial target,,CH-CH donor oxidoreductase,MATATGVAEERLLAALAYLQCAVGCAVFARNRQTNSVYGRHALPSH...,[['D01CJY' 'FR-146687' 'Phase 2']\n ['D0C1SI' ...,,Small molecular drug,1S/C10H13BrN6O3/c11-7-4-8(13)14-2-15-9(4)17(16...,LTTBFVDMHCIDPM-BHBWVORQSA-N,C1=NC(=C2C(=N1)N(N=C2Br)C3C(C(C(O3)CN)O)O)N,Investigative,Investigative Drug(s),
2,T97071,D0Y6UB,Investigative,Inhibitor,Q04609,Glutamate carboxypeptidase II (GCPII),FOLH1,Successful target,6HKZ; 6HKJ; 6H7Z; 6H7Y; 6FE5,Peptidase,MWNLLHETDSAVATARRPRWLCAGALVLAGGFFLLGFLFGWFIKSS...,[['D09PGL' 'Capromab' 'Approved']\n ['DBG46D' ...,,Small molecular drug,"1S/C6H11O7P/c7-5(8)2-1-4(6(9)10)3-14(11,12)13/...",ISEYJGQFXSTPMQ-UHFFFAOYSA-N,C(CC(=O)O)C(CP(=O)(O)O)C(=O)O,Investigative,Investigative Drug(s),


In [13]:
from utils import canonic_smiles

def canonic_smiles_func(smi):
    try:
        ca_smi = canonic_smiles(smi)
        return ca_smi
    except:
        return None
    
sm_interaction_df['smiles'] = sm_interaction_df['smiles'].apply(canonic_smiles_func)

[16:11:10] Explicit valence for atom # 7 Cl, 5, is greater than permitted
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


In [14]:
sm_interaction_df2 = sm_interaction_df.dropna(subset=['uniprot_id', 'smiles', 'status']).drop_duplicates(
    subset=['uniprot_id', 'smiles', 'status'], keep='first')
sm_interaction_df2.shape

(26901, 20)

In [15]:
sm_interaction_df2.to_csv('./data/Binding_data/TTD_small_molecules_pairs_data_with_details.csv', index=False)

In [14]:
sm_interaction_df = sm_interaction_df[['uniprot_id', 'smiles', 'status']].dropna()
sm_interaction_df = sm_interaction_df.drop_duplicates()
sm_interaction_df.shape

(26901, 3)

In [15]:
len(set(sm_interaction_df.smiles.tolist()))

17703

In [16]:
sm_interaction_df['status'].value_counts()

Investigative                      19104
Patented                            2423
Approved                            1553
Phase 2                              839
Terminated                           728
Discontinued in Phase 2              494
Phase 1                              431
Phase 3                              414
Discontinued in Phase 1              217
Preclinical                          213
Discontinued in Phase 3              136
Phase 1/2                            114
Phase 2/3                             53
Withdrawn from market                 49
Phase 4                               44
Clinical trial                        41
Discontinued in Preregistration       14
Phase 2a                               5
approved                               5
Discontinued in Phase 1/2              5
Preregistration                        5
Approved (orphan drug)                 4
Registered                             3
Discontinued in Phase 4                2
Discontinued in 

In [17]:
sm_interaction_df.to_csv('./data/Binding_data/TTD_small_molecules_pairs_data.csv', index=False)

In [18]:
partition_classes = ['Application submitted', 'Approval submitted', 'Approved', 'Approved (orphan drug)', 
                     'BLA submitted', 'NDA filed', 'Discontinued in Phase 4', 'Phase 4', 
                     'Withdrawn from market','approved']
# non_partition_classes = ['Discontinue in Phase 1 Trial', 'Discontinued in Phase 1', 'Discontinued in Phase 1/2',
#                          'Discontinued in Phase 2', 'Discontinued in Phase 2/3', 'Discontinued in Phase 2a',
#                          'Discontinued in Phase 2b', 'Discontinued in Preregistration']
non_partition_classes = ['Discontinued in Phase 1/2', 'Discontinued in Phase 2', 'Discontinued in Phase 2/3', 
                         'Discontinued in Phase 2a', 'Discontinued in Phase 2b']
unknown_classes = ['Investigative', 'Patented']
failed_classes = ['Discontinue in Phase 1 Trial', 'Discontinued in Phase 1', 'Discontinued in Phase 1/2',
                'Discontinued in Phase 2', 'Discontinued in Phase 2/3', 'Discontinued in Phase 2a',
                'Discontinued in Phase 2b', 'Discontinued in Preregistration', 'Terminated', 'Discontinued in Phase 3']

## Load PS information

In [19]:
# with open('./data/PS_Pro/Homo_PS_self.txt', 'r') as file:
#     ps_self_pros = file.read().strip().split('\n')
# with open('./data/PS_Pro/Homo_PS_other.txt', 'r') as file:
#     ps_other_pros = file.read().strip().split('\n')
# with open('./data/PS_Pro/Homo_condensate_form_lt.txt', 'r') as file:
#     conden_form_lt_pros = file.read().strip().split('\n')
# with open('./data/PS_Pro/Homo_condensate_form_all.txt', 'r') as file:
#     conden_form_all_pros = file.read().strip().split('\n')
# with open('./data/PS_Pro/Homo_PS_PhaSePred_top5.txt', 'r') as file:
#     phasepred_pros = file.read().strip().split('\n')

with open('./data/PS_Pro/Homo_whole_PS_top5.txt', 'r') as file:
    whole_ps_top5 = file.read().strip().split('\n')
with open('./data/PS_Pro/Homo_whole_PS_top10.txt', 'r') as file:
    whole_ps_top10 = file.read().strip().split('\n')

len(whole_ps_top5), len(whole_ps_top10)

(1679, 2860)

In [20]:
# intersection of ps & target
all_targets = []
for u in sm_interaction_df.uniprot_id.tolist():
    if u:
        if '|' in u:
            all_targets += u.split('|')
        else:
            all_targets.append(u)

all_targets = list(set(all_targets))
len(all_targets)

1669

In [19]:
# whole_ps_pros = list(set(ps_self_pros + ps_other_pros + phasepred_pros))
# print(len(whole_ps_pros), len(set(whole_ps_pros).intersection(all_targets)))
# ps_lt_conden_pros = list(set(whole_ps_pros + conden_form_lt_pros))
# print(len(ps_lt_conden_pros), len(set(ps_lt_conden_pros).intersection(all_targets)))
# ps_all_conden_pros = list(set(whole_ps_pros + conden_form_all_pros))
# print(len(ps_all_conden_pros), len(set(ps_all_conden_pros).intersection(all_targets)))

1679 163
1975 179
5277 427


In [18]:
# # status analysis
# whole_ps_status, non_ps_status = [], []
# ps_lt_conden_status, non_ps_lt_conden_status = [], []
# ps_all_conden_status, non_ps_all_conden_status = [], []

# for status, u in zip(interaction_df['Highest_status'].tolist(), interaction_df['uniprot_id'].tolist()):
#     if u:
#         if '|' in u:
#             tmp = u.split('|')
#             if len(set(tmp).intersection(whole_ps_pros)) > 0:
#                 whole_ps_status.append(status)
#             else:
#                 non_ps_status.append(status)
#             if len(set(tmp).intersection(ps_lt_conden_pros)) > 0:
#                 ps_lt_conden_status.append(status)
#             else:
#                 non_ps_lt_conden_status.append(status)
#             if len(set(tmp).intersection(ps_all_conden_pros)) > 0:
#                 ps_all_conden_status.append(status)
#             else:
#                 non_ps_all_conden_status.append(status)
#         else:
#             if u in whole_ps_pros:
#                 whole_ps_status.append(status)
#             else:
#                 non_ps_status.append(status)
#             if u in ps_lt_conden_pros:
#                 ps_lt_conden_status.append(status)
#             else:
#                 non_ps_lt_conden_status.append(status)
#             if u in ps_all_conden_pros:
#                 ps_all_conden_status.append(status)
#             else:
#                 non_ps_all_conden_status.append(status)

In [21]:
# top 5
ps_drugs, nps_drugs = [], []
for uni, smi in sm_interaction_df.iloc[:, :2].values:
    if uni in whole_ps_top5:
        ps_drugs.append(smi)
    elif len(set(uni.split('|')).intersection(whole_ps_top5)) > 0:
        ps_drugs.append(smi)
    else:
        nps_drugs.append(smi)

ps_drugs = list(set(ps_drugs))
nps_drugs = list(set(nps_drugs))
print(len(ps_drugs), len(nps_drugs))


# top 10
ps_drugs2, nps_drugs2 = [], []
for uni, smi in sm_interaction_df.iloc[:, :2].values:
    if uni in whole_ps_top10:
        ps_drugs2.append(smi)
    elif len(set(uni.split('|')).intersection(whole_ps_top10)) > 0:
        ps_drugs2.append(smi)
    else:
        nps_drugs2.append(smi)

ps_drugs2 = list(set(ps_drugs2))
nps_drugs2 = list(set(nps_drugs2))
print(len(ps_drugs2), len(nps_drugs2))

2228 16016
3081 15516


In [22]:
# overlap of ps & nps
print(len(set(ps_drugs).intersection(nps_drugs)))
print(len(set(ps_drugs2).intersection(nps_drugs2)))

541
894


In [23]:
with open('./data/Binding_data/TTD_ps_nps_smiles_top5.pkl', 'wb') as file:
    pkl.dump((ps_drugs, nps_drugs), file)

with open('./data/Binding_data/TTD_ps_nps_smiles_top10.pkl', 'wb') as file:
    pkl.dump((ps_drugs2, nps_drugs2), file)

In [23]:
# ps_statuses, nps_statuses = [], []
# for uni, _, status in sm_interaction_df.values:
#     if uni in whole_ps_top5:
#         ps_statuses.append(status)
#     elif len(set(uni.split('|')).intersection(whole_ps_top5)) > 0:
#         ps_statuses.append(status)
#     else:
#         nps_statuses.append(status)

# with open('./data/Binding_data/TTD_ps_nps_clinical_status.pkl', 'wb') as file:
#     pkl.dump((ps_statuses, nps_statuses), file)

In [29]:
from collections import defaultdict
# for whole_ps_pros
partition_drugs, non_partition_drugs = set(), set()
unknown_drugs, failed_drugs = set(), set()
partition_mappings, non_partition_mappings = defaultdict(set), defaultdict(set)
unknown_mappings, failed_mappings = defaultdict(set), defaultdict(set)
## for whole_non_ps_pros
nps_partition_drugs, nps_non_partition_drugs = set(), set()
nps_unknown_drugs, nps_failed_drugs = set(), set()
nps_partition_mappings, nps_non_partition_mappings = defaultdict(set), defaultdict(set)
nps_unknown_mappings, nps_failed_mappings = defaultdict(set), defaultdict(set)

# top 5
# whole_ps_pros = whole_ps_top5
# for top 10
whole_ps_pros = whole_ps_top10

for status, uni, smi in zip(
    sm_interaction_df.status.tolist(), sm_interaction_df.uniprot_id.tolist(), 
    sm_interaction_df.smiles.tolist()):
    if type(smi) == str and uni != '':
        if status in partition_classes:
            if '|' not in uni and uni in whole_ps_pros:
                partition_drugs.update([smi])
                partition_mappings[uni].update([smi])
            elif '|' in uni and len(set(uni.split('|')).intersection(whole_ps_pros)) > 0:
                partition_drugs.update([smi])
                for u in set(uni.split('|')).intersection(whole_ps_pros):
                    partition_mappings[u].update([smi])
            else:
                nps_partition_drugs.update([smi])
                for u in uni.split('|'):
                    nps_partition_mappings[u].update([smi])


        if status in non_partition_classes:
            if '|' not in uni and uni in whole_ps_pros:
                non_partition_drugs.update([smi])
                non_partition_mappings[uni].update([smi])
            elif '|' in uni and len(set(uni.split('|')).intersection(whole_ps_pros)) > 0:
                non_partition_drugs.update([smi])
                for u in set(uni.split('|')).intersection(whole_ps_pros):
                    non_partition_mappings[u].update([smi])
            else:
                nps_non_partition_drugs.update([smi])
                for u in uni.split('|'):
                    nps_non_partition_mappings[u].update([smi])
        
        if status in failed_classes:
            if '|' not in uni and uni in whole_ps_pros:
                failed_drugs.update([smi])
                failed_mappings[uni].update([smi])
            elif '|' in uni and len(set(uni.split('|')).intersection(whole_ps_pros)) > 0:
                failed_drugs.update([smi])
                for u in set(uni.split('|')).intersection(whole_ps_pros):
                    failed_mappings[u].update([smi])
            else:
                nps_failed_drugs.update([smi])
                for u in uni.split('|'):
                    nps_failed_mappings[u].update([smi])

        if status in unknown_classes:
            if '|' not in uni and uni in whole_ps_pros:
                unknown_drugs.update([smi])
                unknown_mappings[uni].update([smi])
            elif '|' in uni and len(set(uni.split('|')).intersection(whole_ps_pros)) > 0:
                unknown_drugs.update([smi])
                for u in set(uni.split('|')).intersection(whole_ps_pros):
                    unknown_mappings[u].update([smi])
            else:
                nps_unknown_drugs.update([smi])
                for u in uni.split('|'):
                    nps_unknown_mappings[u].update([smi])

In [23]:
# # remove duplicates
# # for whole_ps_pros
# partition_drugs, non_partition_drugs = list(set(partition_drugs)), list(set(non_partition_drugs))
# partition_mappings = {k: list(set(v)) for k, v in partition_mappings.items()}
# non_partition_mappings = {k: list(set(v)) for k, v in non_partition_mappings.items()}
# # for all_condensate_form_pros
# partition_drugs2, non_partition_drugs2 = list(set(partition_drugs2)), list(set(non_partition_drugs2))
# partition_mappings2 = {k: list(set(v)) for k, v in partition_mappings2.items()}
# non_partition_mappings2 = {k: list(set(v)) for k, v in non_partition_mappings2.items()}

In [30]:
# top 5
len(partition_drugs), len(non_partition_drugs), len(unknown_drugs), len(failed_drugs)  # 176, 26, 1675, 86

# top 10
# 209, 41, 2368, 128

(209, 41, 2368, 128)

In [31]:
# top 5
len(nps_partition_drugs), len(nps_non_partition_drugs), len(nps_unknown_drugs), len(nps_failed_drugs)  # 1215, 373, 12506, 1112

# top 10
# 1190, 366, 12095, 1098

(1190, 366, 12095, 1098)

In [32]:
# top 5
len(partition_mappings), len(nps_partition_mappings)  # 38, 431

# top 10
# 56, 414

(56, 414)

In [33]:
# for whole_ps
whole_ps_data = {
    'ps': {
        'partition': (partition_drugs, partition_mappings),
        'non-partition': (non_partition_drugs, non_partition_mappings),
        'failed': (failed_drugs, failed_mappings),
        'unknown': (unknown_drugs, unknown_mappings)
    },
    'nps': {
        'partition': (nps_partition_drugs, nps_partition_mappings),
        'non-partition': (nps_non_partition_drugs, nps_non_partition_mappings),
        'failed': (nps_failed_drugs, nps_failed_mappings),
        'unknown': (nps_unknown_drugs, nps_unknown_mappings)
    }
}

# whole_conden_data = {
#     'conden': {
#         'partition': (partition_drugs_cond, partition_mappings_cond),
#         'non-partition': (non_partition_drugs_cond, non_partition_mappings_cond),
#         'failed': (failed_drugs_cond, failed_mappings_cond),
#         'unknown': (unknown_drugs_cond, unknown_mappings_cond)
#     },
#     'non-conden': {
#         'partition': (ncf_partition_drugs_cond, ncf_partition_mappings_cond),
#         'non-partition': (ncf_partition_drugs_cond, ncf_non_partition_mappings_cond),
#         'failed': (ncf_failed_drugs_cond, ncf_failed_mappings_cond),
#         'unknown': (ncf_unknown_drugs_cond, ncf_unknown_mappings_cond)
#     }
# }
with open('./data/Binding_data/TTD_whole_ps_nps_data_top5.pkl', 'wb') as file:
    pkl.dump(whole_ps_data, file)