In [1]:
import sys
import numpy as np
import pandas as pd
import torch
import os
import math

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw

from calldb import CallDB
from polarity import Polarity
import DDI_runner
import BA_runner



In [2]:
# req_id = 'rq0001'

# suitability = CallDB("pro_carrier_suitability_assessment")
# interaction = CallDB("pro_drug_drug_interaction")

# # request table fetch
# request = suitability.from_db(f"""
# SELECT * 
# FROM pro_carrier_suitability_assessment.request 
# WHERE req_id = "{req_id}"
# ;
# """)

# # interaction_dict table fetch
# inter_dict = interaction.from_db("""
# SELECT *
# FROM pro_drug_drug_interaction.interaction_dictionary
# ;
# """)

In [3]:
def main():
    req_id = 'rq0001'
    run = Run(req_id)
    run.run()

In [4]:
class Run:
    def __init__(self, req_id):
        self.req_id = req_id
        
        self.suitability = CallDB("pro_carrier_suitability_assessment")
        self.interaction = CallDB("pro_drug_drug_interaction")
        
        self.request = self.suitability.from_db(f"""
        SELECT * 
        FROM pro_carrier_suitability_assessment.request 
        WHERE req_id = "{self.req_id}"
        ;
        """)
        
        # interaction_dict table fetch
        self.inter_dict = self.interaction.from_db("""
        SELECT *
        FROM pro_drug_drug_interaction.interaction_dictionary
        ;
        """)
        
        self.drug_name = self.request.loc[0, 'drug_name']
        self.smiles = self.request.loc[0, 'smiles']
        self.protein_name = self.request.loc[0, 'protein_name']
        self.sequence = self.request.loc[0, 'sequence']
        self.weight = self.request.loc[0, 'weight']
        
        self.weight_split = self.weight.split('|')
        self.ba_wt = self.weight_split[0]
        self.ae_wt = self.weight_split[1]
        self.ddi_wt = self.weight_split[2]
        
        
    def run(self):     
        drug_fp = self.drug_encoding(self.smiles)
        target_embd = self.target_encoding(self.sequence)
        
        polarity = self.pol(self.smiles)
        self.pol_candidate(polarity)    # 예측극성에 해당되는 후보 fetch -> self
        carrier_fp = self.pol_carrier_fp()
        
        ddi_pred = self.ddi(drug_fp, carrier_fp)
        self.ddi_candidate(ddi_pred)
        carrier_fp = self.ddi_carrier_fp()
        
        ba_pred = self.ba(carrier_fp, target_embd)
        
        result = self.make_df()
        
        self.to_result(result)
        self.to_result_ref(result)
        self.to_img_dir()       
        
        
    def drug_encoding(self, smiles: str) -> torch.Tensor:
        drug_m = Chem.MolFromSmiles(smiles)
        drug_fp = torch.tensor(np.array(AllChem.GetMorganFingerprintAsBitVect(drug_m, 3, nBits=1024)), dtype=torch.float32)
        return drug_fp
    
    
    def target_encoding(self, sequence:str) -> torch.Tensor:
        amino_dict = {s: i for i, s in enumerate(list('ACDEFGHIKLMNOPQRSTVWXY'))}
        sequence_embd = [amino_dict[c] for c in sequence.upper()]  # 단백질 시퀀스 숫자로 변환

        max_len = 10000
        for i in range(max_len - len(sequence_embd)):
            sequence_embd.append(0)

        target_embd = torch.tensor(sequence_embd, dtype=torch.float32)[:2048].view(1, 2048)
        return target_embd   # 벡터
    
    
    def pol(self, smiles):
        polarity = Polarity(smiles)    # polarity 예측
        return polarity

    def pol_candidate(self, polarity):
        # 극성에 대응하는 carier_candidate fetch
        pol_matched = self.suitability.from_db(f"""
        SELECT Drugbank_ID, name, finger_print, block_type, block_score, SMILES
        FROM pro_carrier_suitability_assessment.block_type_library 
        WHERE block_type = {polarity};
        """)

        self.pol_matched = pol_matched[['Drugbank_ID',
                                   'name',
                                   'finger_print',
                                   'block_type',
                                   'block_score',
                                   'SMILES']]
        
    
    def pol_carrier_fp(self) -> torch.Tensor:
        # fingerprint 전처리
        carrier_fp = []
        for i, (_, _, fp, _, _, _) in self.pol_matched.iterrows():
            carrier_fp.append(np.fromstring(fp.replace('', ' '), dtype=int, sep=' '))

        carrier_fp = torch.tensor(carrier_fp, dtype=torch.float32)
        return carrier_fp
    
    
    def ddi(self, drug_fp: torch.Tensor, carrier_fp: torch.Tensor):
        ddi_pred = DDI_runner.run(drug_fp, carrier_fp)
        ddi_pred = list(np.array(ddi_pred.detach()))
        return ddi_pred

        
    def ddi_candidate(self, ddi_pred) -> object:
        # interaction 점수 가져오기
        self.ddi_type = pd.DataFrame(np.array(self.inter_dict[['label', 'interaction_type']].iloc[ddi_pred]), columns=['label', 'interaction_type']) 

        if self.ddi_wt == 1:
            inter_type = 2

        elif self.ddi_wt == 0.5:
            inter_type = 1

        else:
            inter_type = 0

        # 만족하는 점수만 선택
        self.ddi_matched_type = self.ddi_type['interaction_type'] >= inter_type
        print(self.ddi_matched_type)
        self.ddi_matched = self.pol_matched[['Drugbank_ID', 'finger_print', 'SMILES']].iloc[list(self.ddi_type.index[self.ddi_matched_type])]

        
    # string to np.ndarray
    def ddi_carrier_fp(self) -> torch.Tensor:  
        carrier_fp = []
        for i, (id, fp, _) in self.ddi_matched.iterrows():
            carrier_fp.append(np.fromstring(fp.replace('', ' '), dtype=int, sep=' '))

        carrier_fp = torch.tensor(carrier_fp, dtype=torch.float32)
        return carrier_fp
    
    # Binding_affinity 예측
    def ba(self, carrier_fp: torch.Tensor, target_embd: torch.Tensor) -> object:

        # 같은 개수만큼 복사
        sequence_temp = torch.tensor([], dtype=torch.float32).view(0, 2048)
        for i in range(len(carrier_fp)):
            sequence_temp = torch.cat([sequence_temp, target_embd], dim=0)
        target_embd = sequence_temp

        ba_pred = BA_runner.run(carrier_fp, target_embd, len(carrier_fp))
        ba_pred = np.round(np.array(ba_pred.detach()), 4)
        self.ba_pred = np.where(ba_pred < 0, 0.0001, ba_pred)
        
        return self.ba_pred

    
    def make_df(self) -> object:
        drugbank_id = pd.DataFrame(self.ddi_matched['Drugbank_ID'], columns=['Drugbank_ID'])
        result = drugbank_id
        pol_result = self.pol_matched

        ddi_result1 = self.ddi_type[self.ddi_matched_type]

        ddi_result2 = self.inter_dict.iloc[list(self.ddi_type[self.ddi_matched_type]['label'])][['label', 'interaction']]
        ddi_result2 = pd.DataFrame(np.array(ddi_result2), columns=['label', 'interaction'])

        ba_result = pd.DataFrame(self.ba_pred, columns=['ic50', 'ec50'])

        request_id = pd.DataFrame({'req_id' : [self.req_id for i in range(len(result))]})

        result = result.join(ddi_result1).merge(pol_result).join(ba_result).join(ddi_result2[['interaction']]).join(request_id)
        result = result[['req_id', 'Drugbank_ID', 'name', 'block_type', 'block_score', 'interaction', 'interaction_type', 'ic50', 'ec50']]

        return result
    
    
    def to_result(self, result: object):
        for idx, (req_id, Drugbank_ID, name, block_type, block_score, interaction, interaction_type, ic50, ec50) in result.iterrows():
            name = name.replace("'", "`")
            
            interaction = interaction.replace('A', self.drug_name).replace('B', name)

            n_ref_ae = self.suitability.from_db(f"""
            SELECT COUNT(*)
            FROM pro_carrier_suitability_assessment.abstract_adverse_effect
            WHERE abstract like '%{name}%'
            """
                                          ).iloc[0,0]

            total_score = (20 * block_score) + (10 * interaction_type) + (-math.log(ic50)) + (-math.log(ec50))

            self.suitability.query_db(f"""
            INSERT INTO pro_carrier_suitability_assessment.result(req_id, 
                                                                  idx, 
                                                                  Drugbank_ID, 
                                                                  name, 
                                                                  block_type, 
                                                                  block_score, 
                                                                  interaction, 
                                                                  interaction_type, 
                                                                  ic50, ec50, 
                                                                  n_ref_da, 
                                                                  n_ref_ae,
                                                                  total_score)
            VALUES ('{req_id}', 
                     {idx}, 
                     '{Drugbank_ID}', 
                     '{name}', 
                     {block_type}, 
                     {round(block_score, 3)}, 
                     '{interaction}', 
                     {interaction_type}, 
                     {round(ic50, 3)}, 
                     {round(ec50, 3)},
                     0, 
                     {n_ref_ae},
                     {total_score});    
            """)
    

    def to_result_ref(self, result: object):
        for idx, (req_id, Drugbank_ID, name) in result[['req_id', 'Drugbank_ID', 'name']].iterrows():
            name = name.replace("'", "`")
            self.suitability.query_db(f"""
            INSERT INTO pro_carrier_suitability_assessment.result_adverse_effects_ref (req_id, index_id, reference_title, year, summary)
            (
            SELECT '{req_id}', {idx}, title, year, summary
            FROM pro_carrier_suitability_assessment.abstract_adverse_effect
            WHERE abstract like '%{name}%'
            );
            """)
    
    
    # 분자 이미지 저장
    def to_img_dir(self):
        smiles = pd.DataFrame(self.ddi_matched[['Drugbank_ID', 'SMILES']])
        os.system(f'mkdir /BiO/projects/polarity/carrier_suitability_assessment/img_result/{self.req_id}')
        for i, (id, smi) in smiles.iterrows():
            m = Chem.MolFromSmiles(smi)
            Draw.MolToFile(m, f'/BiO/projects/polarity/carrier_suitability_assessment/img_result/{self.req_id}/{id}.png')



In [5]:
if __name__ == "__main__":
    main()

0.5


  combi_data = carrier_fp + torch.tensor(drug_fp, dtype=torch.float32)


In [6]:
# def main(req_id):
#     global request
#     global inter_dict
    
#     drug_name = request.loc[0, 'drug_name']
#     smiles = request.loc[0, 'smiles']
#     protein_name = request.loc[0, 'protein_name']
#     sequence = request.loc[0, 'sequence']
# #     weight = request.loc[0, 'weight']
#     weight = '0.5|1|0.5'
    
#     weight_split = weight.split('|')
#     ba_wt = weight_split[0]
#     ae_wt = weight_split[1]
    
#     if weight_split[2] == 1:
#         ddi_wt = 2
        
#     elif weight_split[2] == 0.5:
#         ddi_wt = 1
        
#     else:
#         ddi_wt = 0
    
#     # 타겟 약물이 친수성인지 소수성인지 확인
    
#     drug_fp = drug_encoding(smiles)   # 입력 약물
#     target_embd = target_encoding(sequence)    # 입력타겟, 받는 변수 target_embd 고려
    
#     polarity = Polarity(smiles)    # polarity 예측
#     carrier_fp, pol_matched = carrier_encoding(polarity) # 예측된 polarity에 의해 적합후보군 추출
    
#     ddi_pred = ddi(drug_fp, carrier_fp)   # ddi 결과 예측
#     carrier_fp, ddi_type, ddi_matched_type, ddi_matched = pick_candidate(ddi_pred, pol_matched, ddi_wt) # ddi 예측 결과 바탕으로 적합후보군 추출
    
#     ba_pred = ba(carrier_fp, target_embd)   # ba 결과 예측
    
#     result = make_df(ba_pred, ddi_type, ddi_matched_type, ddi_matched, pol_matched)
    
#     to_result(result)
#     to_result_ref(result)
#     to_img_dir(req_id, ddi_matched)
    
    
    
    
    

In [7]:
# ##################
# import numpy as np
# import torch
# from rdkit import Chem
# from rdkit.Chem import AllChem

# # 타겟 약물 인코딩
# def drug_encoding(smiles: str) -> torch.Tensor:
#     drug_m = Chem.MolFromSmiles(smiles)
#     drug_fp = torch.tensor(np.array(AllChem.GetMorganFingerprintAsBitVect(drug_m, 3, nBits=1024)), dtype=torch.float32)
#     return drug_fp

In [8]:
# # 전달체 후보군 finger print 불러오기, 데이터베이스 조작에 의해서만 입력값 조정하고 싶음
# def carrier_encoding(polarity: int) -> torch.Tensor:
#     pol_matched = suitability.from_db(f"""
#     SELECT Drugbank_ID, name, finger_print, block_type, block_score, SMILES
#     FROM pro_carrier_suitability_assessment.block_type_library 
#     WHERE block_type = {polarity};
#     """)
    
#     pol_matched = pol_matched[['Drugbank_ID',
#                                'name',
#                                'finger_print',
#                                'block_type',
#                                'block_score',
#                                'SMILES']]
    
#     carrier_fp = []
#     for i, (_, _, fp, _, _, _) in pol_matched.iterrows():
#         carrier_fp.append(np.fromstring(fp.replace('', ' '), dtype=int, sep=' '))
        
#     carrier_fp = torch.tensor(carrier_fp, dtype=torch.float32)
#     return carrier_fp, pol_matched

In [9]:
# import DDI_runner

# def ddi(drug_fp: torch.Tensor, carrier_fp: torch.Tensor) -> object:
#     ddi_pred = DDI_runner.run(drug_fp, carrier_fp)
#     ddi_pred = list(np.array(ddi_pred.detach()))
#     return ddi_pred


In [10]:
# # carrier 후보 fingerprint extraction
# def candicate_fp(ddi_matched: object) -> torch.Tensor:  
#     carrier_fp = []
#     for i, (id, fp, _) in ddi_matched.iterrows():
#         carrier_fp.append(np.fromstring(fp.replace('', ' '), dtype=int, sep=' '))

#     carrier_fp = torch.tensor(carrier_fp, dtype=torch.float32)
    
#     return carrier_fp

In [11]:
# # DDI 결과를 바탕으로 BA 계산할 전달체 후보 추출
# import pandas as pd

# def pick_candidate(ddi_pred: object, pol_matched: object, ddi_wt: int) -> object:
#     global inter_dict
    
#     # interaction 점수 가져오기
#     ddi_type = pd.DataFrame(np.array(inter_dict[['label', 'interaction_type']].iloc[ddi_pred]), columns=['label', 'interaction_type']) 

#     # 만족하는 점수만 선택
#     ddi_matched_type = ddi_type['interaction_type'] >= ddi_wt   # 이거 안될 수도 있음, 
#     ddi_matched = pol_matched[['Drugbank_ID', 'finger_print', 'SMILES']].iloc[list(ddi_type.index[ddi_matched_type])]
    
#     carrier_fp = candidate_fp(ddi_matched)   # candidate_fp 함수
    
#     return carrier_fp, ddi_type, ddi_matched_type, ddi_matched

In [12]:
# # 타겟 단백질 인코딩
# def target_encoding(sequence:str) -> torch.Tensor:
#     amino_dict = {s: i for i, s in enumerate(list('ACDEFGHIKLMNOPQRSTVWXY'))}
#     sequence_embd = [amino_dict[c] for c in sequence.upper()]  # 단백질 시퀀스 숫자로 변환

#     max_len = 10000
#     for i in range(max_len - len(sequence_embd)):
#         sequence_embd.append(0)

#     target_embd = torch.tensor(sequence_embd, dtype=torch.float32)[:2048].view(1, 2048)

    
#     return target_embd
    

In [13]:
# # Binding_affinity 예측
# import BA_runner
# def ba(carrier_fp: torch.Tensor, target_embd: torch.Tensor) -> object:
    
#     sequence_temp = torch.tensor([], dtype=torch.float32).view(0, 2048)
#     for i in range(len(carrier_fp)):    # 같은 개수만큼 복사
#         sequence_temp = torch.cat([sequence_temp, sequence_embd], dim=0)
#     target_embd = sequence_temp
    
#     ba_pred = BA_runner.run(carrier_fp, target_embd, len(carrier_fp))
#     ba_pred = np.round(np.array(ba_pred.detach()), 4)
#     ba_pred = np.where(ba_pred < 0, 0.0001, ba_pred)
#     return ba_pred

In [14]:
# # DB_result에 삽입할 DF 만들기

# def make_df(ba_pred: object, ddi_type: object, ddi_matched_type: object, ddi_matched: object, pol_matched: object) -> object:
#     global inter_dict
    
#     drugbank_id = pd.DataFrame(ddi_matched['Drugbank_ID'], columns=['Drugbank_ID'])
#     result = drugbank_id
#     pol_result = pol_matched

#     ddi_result1 = ddi_type[ddi_matched_type]

#     ddi_result2 = inter_dict.iloc[list(ddi_type[ddi_matched_type]['label'])][['label', 'interaction']]
#     ddi_result2 = pd.DataFrame(np.array(ddi_result2), columns=['label', 'interaction'])

#     ba_result = pd.DataFrame(ba_pred, columns=['ic50', 'ec50'])

#     request_id = pd.DataFrame({'req_id' : [req_id for i in range(len(result))]})
    
#     result = result.join(ddi_result1).merge(pol_result).join(ba_result).join(ddi_result2[['interaction']]).join(request_id)
#     result = result[['req_id', 'Drugbank_ID', 'name', 'block_type', 'block_score', 'interaction', 'interaction_type', 'ic50', 'ec50']]
    
#     return result

In [15]:
# import math

# def to_result(result: object):
#     for idx, (req_id, Drugbank_ID, name, block_type, block_score, interaction, interaction_type, ic50, ec50) in result.iterrows():
#         interaction = interaction.replace('A', drug_name).replace('B', name)

#         n_ref_ae = suitability.from_db(f"""
#         SELECT COUNT(*)
#         FROM pro_carrier_suitability_assessment.abstract_adverse_effect
#         WHERE abstract like '%{name}%'
#         """
#                                       ).iloc[0,0]

#         total_score = (20 * block_score) + (10 * interaction_type) + (-math.log(ic50)) + (-math.log(ec50))

#         suitability.query_db(f"""
#         INSERT INTO pro_carrier_suitability_assessment.result(req_id, 
#                                                               idx, 
#                                                               Drugbank_ID, 
#                                                               name, 
#                                                               block_type, 
#                                                               block_score, 
#                                                               interaction, 
#                                                               interaction_type, 
#                                                               ic50, ec50, 
#                                                               n_ref_da, 
#                                                               n_ref_ae,
#                                                               total_score)
#         VALUES ('{req_id}', 
#                  {idx}, 
#                  '{Drugbank_ID}', 
#                  '{name}', 
#                  {block_type}, 
#                  {round(block_score, 3)}, 
#                  '{interaction}', 
#                  {interaction_type}, 
#                  {round(ic50, 3)}, 
#                  {round(ec50, 3)},
#                  0, 
#                  {n_ref_ae},
#                  {total_score});    
#         """)


In [16]:
# def to_result_ref(result: object):
#     for idx, (req_id, Drugbank_ID, name) in result[['req_id', 'Drugbank_ID', 'name']].iterrows():
#         suitability.query_db(f"""
#         INSERT INTO pro_carrier_suitability_assessment.result_adverse_effects_ref (req_id, index_id, reference_title, year, summary)
#         (
#         SELECT '{req_id}', {idx}, title, year, summary
#         FROM pro_carrier_suitability_assessment.abstract_adverse_effect
#         WHERE abstract like '%{name}%'
#         );
#         """)

In [17]:
# # 분자 이미지 저장
# from rdkit.Chem import Draw
# import os

# def to_img_dir(req_id: str, ddi_matched: object):
#     smiles = pd.DataFrame(ddi_matched[['Drugbank_ID', 'SMILES']])
#     os.system(f'mkdir /BiO/projects/polarity/carrier_suitability_assessment/img_result/{req_id}')
#     for i, (id, smi) in smiles.iterrows():
#         m = Chem.MolFromSmiles(smi)
#         Draw.MolToFile(m, f'/BiO/projects/polarity/carrier_suitability_assessment/img_result/{req_id}/{id}.png')