In [1]:
import sys
from calldb import CallDB

from polarity import Polarity

req_id = 'rq0001'

suitability = CallDB("pro_carrier_suitability_assessment")
interaction = CallDB("pro_drug_drug_interaction")

# request table fetch
request = suitability.from_db(f"""
SELECT * 
FROM pro_carrier_suitability_assessment.request 
WHERE req_id = "{req_id}"
;
""")

# interaction_dict table fetch
inter_dict = interaction.from_db("""
SELECT *
FROM pro_drug_drug_interaction.interaction_dictionary
;
""")


def main(req_id):
    global request
    global inter_dict
    
    drug_name = request.loc[0, 'drug_name']
    smiles = request.loc[0, 'smiles']
    protein_name = request.loc[0, 'protein_name']
    sequence = request.loc[0, 'sequence']
    weight = request.loc[0, 'weight']
    
    weight_split = weight.split()
    ba_wt = weight_split[0]
    ae_wt = weight_split[1]
    ddi_wt = weight_split[2]
    
    # 타겟 약물이 친수성인지 소수성인지 확인
    
    drug_fp = drug_encoding(smiles)   # 입력 약물
    target_embd = target_encoding(sequence)    # 입력타겟, 받는 변수 target_embd 고려
    
    polarity = Polarity(smiles)    # polarity 예측
    carrier_fp, pol_matched = carrier_encoding(polarity) # 예측된 polarity에 의해 적합후보군 추출
    
    ddi_pred = ddi(drug_fp, carrier_fp)   # ddi 결과 예측
    carrier_fp, ddi_type, ddi_matched_type, ddi_matched = pick_candidate(ddi_pred, pol_matched) # ddi 예측 결과 바탕으로 적합후보군 추출
    
    ba_pred = ba(carrier_fp, target_embd)   # ba 결과 예측
    
    result = make_df(ba_pred, ddi_type, ddi_matched_type, ddi_matched, pol_matched)
    
    to_result(result)
    to_result_ref(result)
    to_img_dir(req_id, ddi_matched)
    
    
    
    
    



In [12]:
##################
import numpy as np
import torch
from rdkit import Chem
from rdkit.Chem import AllChem

# 타겟 약물 인코딩
def drug_encoding(smiles: str) -> torch.Tensor:
    drug_m = Chem.MolFromSmiles(smiles)
    drug_fp = torch.tensor(np.array(AllChem.GetMorganFingerprintAsBitVect(drug_m, 3, nBits=1024)), dtype=torch.float32)
    return drug_fp

In [13]:
# 전달체 후보군 finger print 불러오기, 데이터베이스 조작에 의해서만 입력값 조정하고 싶음
def carrier_encoding(polarity: int) -> torch.Tensor:
    pol_matched = suitability.from_db(f"""
    SELECT Drugbank_ID, name, finger_print, block_type, block_score, SMILES
    FROM pro_carrier_suitability_assessment.block_type_library 
    WHERE block_type = {polarity};
    """)
    
    pol_matched = pol_matched[['Drugbank_ID',
                               'name',
                               'finger_print',
                               'block_type',
                               'block_score',
                               'SMILES']]
    
    carrier_fp = []
    for i, (_, _, fp, _, _, _) in pol_matched.iterrows():
        carrier_fp.append(np.fromstring(fp.replace('', ' '), dtype=int, sep=' '))
        
    carrier_fp = torch.tensor(carrier_fp, dtype=torch.float32)
    return carrier_fp, pol_matched

In [17]:
import DDI_runner

def ddi(drug_fp: torch.Tensor, carrier_fp: torch.Tensor) -> object:
    ddi_pred = DDI_runner.run(drug_fp, carrier_fp)
    ddi_pred = list(np.array(ddi_pred.detach()))
    return ddi_pred


In [None]:
# DDI 결과를 바탕으로 BA 계산할 전달체 후보 추출
import pandas as pd

def pick_candidate(ddi_pred: object, pol_matched: object) -> object:
    global inter_dict
    
    # interaction 점수 가져오기
    ddi_type = pd.DataFrame(np.array(inter_dict[['label', 'interaction_type']].iloc[ddi_pred]), columns=['label', 'interaction_type']) 

    # 만족하는 점수만 선택
    ddi_matched_type = ddi_type['interaction_type']==inter_type   # 이거 안될 수도 있음, 
    ddi_matched = pol_matched[['Drugbank_ID', 'finger_print', 'SMILES']].iloc[list(ddi_type.index[ddi_matched_type])]
    
    carrier_fp = candidate_fp(ddi_matched)   # candidate_fp 함수
    
    return carrier_fp, ddi_type, ddi_matched_type, ddi_matched

In [16]:
# carrier 후보 fingerprint extraction
def candicate_fp(ddi_matched; object) -> torch.Tensor:  
    carrier_fp = []
    for i, (id, fp, _) in ddi_matched.iterrows():
        carrier_fp.append(np.fromstring(fp.replace('', ' '), dtype=int, sep=' '))

    carrier_fp = torch.tensor(carrier_fp, dtype=torch.float32)
    
    return carrier_fp

In [6]:
# 타겟 단백질 인코딩
def target_encoding(sequence:str) -> torch.Tensor:
    amino_dict = {s: i for i, s in enumerate(list('ACDEFGHIKLMNOPQRSTVWXY'))}
    sequence_embd = [amino_dict[c] for c in sequence.upper()]  # 단백질 시퀀스 숫자로 변환

    max_len = 10000
    for i in range(max_len - len(sequence_embd)):
        sequence_embd.append(0)

    sequence_embd = torch.tensor(sequence_embd, dtype=torch.float32)[:2048].view(1, 2048)
    sequence_temp = torch.tensor([], dtype=torch.float32).view(0, 2048)

    for i in range(len(carrier_fp)):
        sequence_temp = torch.cat([sequence_temp, sequence_embd], dim=0)
    target_embd = sequence_temp
    
    return target_embd
    

In [7]:
# Binding_affinity 예측
import BA_runner
def ba(carrier_fp: torch.Tensor, target_embd: torch.Tensor) -> object:
    
    ba_pred = BA_runner.run(carrier_fp, sequence_embd, len(carrier_fp))
    ba_pred = np.round(np.array(ba_pred.detach()), 4)
    ba_pred = np.where(ba_pred < 0, 0.0001, ba_pred)
    return ba_pred

In [8]:
# DB_result에 삽입할 DF 만들기

def make_df(ba_pred: object, ddi_type: object, ddi_matched_type: object, ddi_matched: object, pol_matched: object): -> object
    global inter_dict
    
    drugbank_id = pd.DataFrame(ddi_matched['Drugbank_ID'], columns=['Drugbank_ID'])
    result = drugbank_id
    pol_result = pol_matched

    ddi_result1 = ddi_type[ddi_matched_type]

    ddi_result2 = inter_dict.iloc[list(ddi_type[ddi_matched_type]['label'])][['label', 'interaction']]
    ddi_result2 = pd.DataFrame(np.array(ddi_result2), columns=['label', 'interaction'])

    ba_result = pd.DataFrame(ba_pred, columns=['ic50', 'ec50'])

    request_id = pd.DataFrame({'req_id' : [req_id for i in range(len(result))]})
    
    result = result.join(ddi_result1).merge(pol_result).join(ba_result).join(ddi_result2[['interaction']]).join(request_id)
    result = result[['req_id', 'Drugbank_ID', 'name', 'block_type', 'block_score', 'interaction', 'interaction_type', 'ic50', 'ec50']]
    
    return result

In [18]:
import math

def to_result(result: object):
    for idx, (req_id, Drugbank_ID, name, block_type, block_score, interaction, interaction_type, ic50, ec50) in result.iterrows():
        interaction = interaction.replace('A', drug_name).replace('B', name)

        n_ref_ae = suitability.from_db(f"""
        SELECT COUNT(*)
        FROM pro_carrier_suitability_assessment.abstract_adverse_effect
        WHERE abstract like '%{name}%'
        """
                                      ).iloc[0,0]

        total_score = (20 * block_score) + (10 * interaction_type) + (-math.log(ic50)) + (-math.log(ec50))

        suitability.query_db(f"""
        INSERT INTO pro_carrier_suitability_assessment.result(req_id, 
                                                              idx, 
                                                              Drugbank_ID, 
                                                              name, 
                                                              block_type, 
                                                              block_score, 
                                                              interaction, 
                                                              interaction_type, 
                                                              ic50, ec50, 
                                                              n_ref_da, 
                                                              n_ref_ae,
                                                              total_score)
        VALUES ('{req_id}', 
                 {idx}, 
                 '{Drugbank_ID}', 
                 '{name}', 
                 {block_type}, 
                 {round(block_score, 3)}, 
                 '{interaction}', 
                 {interaction_type}, 
                 {round(ic50, 3)}, 
                 {round(ec50, 3)},
                 0, 
                 {n_ref_ae},
                 {total_score});    
        """)


In [19]:
def to_result_ref(result: object):
    for idx, (req_id, Drugbank_ID, name) in result[['req_id', 'Drugbank_ID', 'name']].iterrows():
        suitability.query_db(f"""
        INSERT INTO pro_carrier_suitability_assessment.result_adverse_effects_ref (req_id, index_id, reference_title, year, summary)
        (
        SELECT '{req_id}', {idx}, title, year, summary
        FROM pro_carrier_suitability_assessment.abstract_adverse_effect
        WHERE abstract like '%{name}%'
        );
        """)

In [12]:
# 분자 이미지 저장
from rdkit.Chem import Draw
import os

def to_img_dir(req_id: str, ddi_matched: object):
    smiles = pd.DataFrame(ddi_matched[['Drugbank_ID', 'SMILES']])
    os.system(f'mkdir /BiO/projects/polarity/carrier_suitability_assessment/img_result/{req_id}')
    for i, (id, smi) in smiles.iterrows():
        m = Chem.MolFromSmiles(smi)
        Draw.MolToFile(m, f'/BiO/projects/polarity/carrier_suitability_assessment/img_result/{req_id}/{id}.png')