In [None]:
import pandas as pd
import os
import numpy as np

def merge_gene_drug_files(file1_path, file2_path, output_filename="old.csv", column_order=None):
    """
    –û–±—ä–µ–¥–∏–Ω—è–µ—Ç –¥–≤–∞ —Ñ–∞–π–ª–∞ –¥–∞–Ω–Ω—ã—Ö –æ –≥–µ–Ω–∞—Ö –∏ –ø—Ä–µ–ø–∞—Ä–∞—Ç–∞—Ö —Å —É—á–µ—Ç–æ–º –≤—Å–µ—Ö —Ç—Ä–µ–±–æ–≤–∞–Ω–∏–π:
    - –û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –ø–æ Gene, UniProt ID –∏ Drug CHEMBL ID
    - –ö–æ—Ä—Ä–µ–∫—Ç–Ω–∞—è –æ–±—Ä–∞–±–æ—Ç–∫–∞ Protein name
    - –ü—Ä–∞–≤–∏–ª—å–Ω–æ–µ –∑–∞–ø–æ–ª–Ω–µ–Ω–∏–µ Drug CHEMBL ID
    - –ú–∞—Ä–∫–∏—Ä–æ–≤–∫–∞ –∏—Å—Ç–æ—á–Ω–∏–∫–∞ –¥–∞–Ω–Ω—ã—Ö
    - –í–æ–∑–º–æ–∂–Ω–æ—Å—Ç—å –Ω–∞—Å—Ç—Ä–æ–∏—Ç—å –ø–æ—Ä—è–¥–æ–∫ —Å—Ç–æ–ª–±—Ü–æ–≤ —Å –ø–æ–º–æ—â—å—é column_order
    """
    try:
        # 1. –ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö
        df_chembl = pd.read_csv(file1_path, sep=None, engine='python', encoding='utf-8')
        df_ttd = pd.read_csv(file2_path, sep=None, engine='python', encoding='utf-8')
        
        # 2. –°—Ç–∞–Ω–¥–∞—Ä—Ç–∏–∑–∞—Ü–∏—è –Ω–∞–∑–≤–∞–Ω–∏–π —Å—Ç–æ–ª–±—Ü–æ–≤
        df_chembl.columns = df_chembl.columns.str.strip()
        df_ttd.columns = df_ttd.columns.str.strip()

        # 3. –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ Drug CHEMBL ID
        def get_drug_chembl_id(df):
            for col in ['Drug ChEMBL ID', 'Drug_ID_ChEMBL', 'Drug_CHEMBL_ID', 'Drug CHEMBL ID']:
                if col in df.columns:
                    return df[col].astype(str).replace('nan', 'N/A')
            return pd.Series(['N/A'] * len(df))

        df_chembl['Drug_CHEMBL_ID'] = get_drug_chembl_id(df_chembl)
        df_ttd['Drug_CHEMBL_ID'] = get_drug_chembl_id(df_ttd)

        # 4. –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ Protein name
        # –ü–µ—Ä–µ–∏–º–µ–Ω–æ–≤—ã–≤–∞–µ–º —Å—Ç–æ–ª–±—Ü—ã –¥–ª—è —è—Å–Ω–æ—Å—Ç–∏
        if 'Protein name' in df_chembl.columns:
            df_chembl['Protein_name_chembl'] = df_chembl['Protein name']
        if 'Protein name' in df_ttd.columns:
            df_ttd['Protein_name_ttd'] = df_ttd['Protein name']

        # 5. –î–æ–±–∞–≤–ª–µ–Ω–∏–µ –≤—Ä–µ–º–µ–Ω–Ω—ã—Ö –º–µ—Ç–æ–∫ –∏—Å—Ç–æ—á–Ω–∏–∫–æ–≤
        df_chembl['_source_chembl'] = True
        df_ttd['_source_ttd'] = True

        # 6. –û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö –ø–æ –∫–ª—é—á–µ–≤—ã–º –ø–æ–ª—è–º
        merge_keys = ['Gene', 'UniProt ID', 'Drug_CHEMBL_ID']
        merged = pd.merge(
            df_chembl, 
            df_ttd, 
            on=merge_keys, 
            how='outer', 
            suffixes=('_ChEMBL', '_TTD')
        )

        # 7. –û–±—Ä–∞–±–æ—Ç–∫–∞ Protein name
        merged['Protein name'] = merged['Protein_name_chembl'].combine_first(merged['Protein_name_ttd'])
        
        # 8. –û–±—Ä–∞–±–æ—Ç–∫–∞ Drug CHEMBL ID
        # –£–∂–µ –ø—Ä–∞–≤–∏–ª—å–Ω–æ –æ–±—ä–µ–¥–∏–Ω–µ–Ω–æ –ø–æ merge_keys

        # 9. –û–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –∏—Å—Ç–æ—á–Ω–∏–∫–∞ –¥–∞–Ω–Ω—ã—Ö
        conditions = [
            (merged['_source_chembl'].notna() & merged['_source_ttd'].notna()),  # –û–±–µ –±–∞–∑—ã
            (merged['_source_chembl'].notna()),  # –¢–æ–ª—å–∫–æ ChEMBL
            (merged['_source_ttd'].notna())      # –¢–æ–ª—å–∫–æ TTD
        ]
        choices = ['ChEMBL & TTD', 'ChEMBL', 'TTD']
        merged['Database'] = np.select(conditions, choices, default='Unknown')

        # 10. –£–¥–∞–ª–µ–Ω–∏–µ –≤—Ä–µ–º–µ–Ω–Ω—ã—Ö –∏ –ª–∏—à–Ω–∏—Ö —Å—Ç–æ–ª–±—Ü–æ–≤
        cols_to_drop = ['_source_chembl', '_source_ttd', 'Protein_name_chembl', 'Protein_name_ttd', 
                        'Drug TTD ID', 'Protein TTD ID', 'Protein ChEMBL ID', 'Protein name_ChEMBL', 'Protein name_TTD']
        
        merged.drop(cols_to_drop, axis=1, inplace=True, errors='ignore')

        # 11. –£–¥–∞–ª–µ–Ω–∏–µ –ª–∏—à–Ω–µ–≥–æ —Å—Ç–æ–ª–±—Ü–∞ Drug CHEMBL ID (–µ—Å–ª–∏ –ø–æ—è–≤–∏–ª—Å—è –ª–∏—à–Ω–∏–π)
        merged = merged.loc[:, ~merged.columns.str.contains('Drug CHEMBL ID', case=False)].copy()

        # 12. –ü–µ—Ä–µ–∏–º–µ–Ω–æ–≤–∞–Ω–∏–µ —Å—Ç–æ–ª–±—Ü–æ–≤
        merged.rename(columns={
            'Drug_CHEMBL_ID': 'Drug ChEMBL ID',
            'Drug name_ChEMBL': 'Drug name ChEMBL',
            'Drug name_TTD': 'Drug name TTD'
        }, inplace=True)

        # 13. –ï—Å–ª–∏ –ø–µ—Ä–µ–¥–∞–Ω —Å–ø–∏—Å–æ–∫ column_order, –ø—Ä–∏–º–µ–Ω—è–µ–º –µ–≥–æ
        if column_order:
            # –ü—Ä–æ–≤–µ—Ä–∏–º, —á—Ç–æ –≤—Å–µ —Å—Ç–æ–ª–±—Ü—ã –∏–∑ column_order –ø—Ä–∏—Å—É—Ç—Å—Ç–≤—É—é—Ç –≤ –¥–∞–Ω–Ω—ã—Ö
            if all(col in merged.columns for col in column_order):
                merged = merged[column_order]
            else:
                print("‚ùå –ù–µ–∫–æ—Ç–æ—Ä—ã–µ —Å—Ç–æ–ª–±—Ü—ã –∏–∑ column_order –Ω–µ –Ω–∞–π–¥–µ–Ω—ã –≤ –¥–∞–Ω–Ω—ã—Ö, –∏—Å–ø–æ–ª—å–∑—É–µ–º —Å—Ç–∞–Ω–¥–∞—Ä—Ç–Ω—ã–π –ø–æ—Ä—è–¥–æ–∫")

        # 14. –°–æ—Ä—Ç–∏—Ä–æ–≤–∫–∞ –ø–æ –∑–Ω–∞—á–µ–Ω–∏—é –≤ —Å—Ç–æ–ª–±—Ü–µ 'Drug Mechanism ChEMBL'
        # –î–ª—è –Ω–∞—á–∞–ª–∞ —Å–æ–∑–¥–∞–µ–º –ø–æ—Ä—è–¥–æ–∫ —Å–æ—Ä—Ç–∏—Ä–æ–≤–∫–∏ –¥–ª—è –∑–Ω–∞—á–µ–Ω–∏–π "–î–∞", "–ù–µ—Ç" –∏ –¥—Ä—É–≥–∏—Ö
        def sort_drug_mechanism(row):
            if row == "–î–∞":
                return 0
            elif row == "–ù–µ—Ç":
                return 1
            else:
                return 2

        # –ü—Ä–∏–º–µ–Ω—è–µ–º —Å–æ—Ä—Ç–∏—Ä–æ–≤–∫—É –ø–æ "Drug Mechanism ChEMBL"
        merged['Drug Mechanism ChEMBL_sort'] = merged['Drug Mechanism ChEMBL'].apply(sort_drug_mechanism)
        merged = merged.sort_values(by=['Gene', 'Drug Mechanism ChEMBL_sort']).drop(columns=['Drug Mechanism ChEMBL_sort'])

        # 15. –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞
        output_path = os.path.join(os.getcwd(), output_filename)
        merged.to_csv(output_path, sep=',', index=False, encoding='utf-8')
        
        # 16. –í–∞–ª–∏–¥–∞—Ü–∏—è —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞
        print(f"‚úÖ –§–∞–π–ª —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω: {output_path}")
        print(f"–í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫: {len(merged)}")
        print("–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º:")
        print(merged['Database'].value_counts())
        
        return merged

    except Exception as e:
        print(f"‚ùå –û—à–∏–±–∫–∞: {str(e)}")
        if 'df_chembl' in locals():
            print("\n–°—Ç–æ–ª–±—Ü—ã –≤ —Ñ–∞–π–ª–µ ChEMBL:", df_chembl.columns.tolist())
        if 'df_ttd' in locals():
            print("–°—Ç–æ–ª–±—Ü—ã –≤ —Ñ–∞–π–ª–µ TTD:", df_ttd.columns.tolist())
        return None

# –ü—Ä–∏–º–µ—Ä –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è
if __name__ == '__main__':
    # –£–∫–∞–∂–∏—Ç–µ –ø—É—Ç–∏ –∫ –≤–∞—à–∏–º —Ñ–∞–π–ª–∞–º
    chembl_file = r"../chembl/extracted_chembl_gene_drug_data.csv"
    ttd_file = r"../ttd/extracted_ttd_gene_drug_data.csv"
    
    # –û–ø—Ä–µ–¥–µ–ª–∏—Ç–µ –ø–æ—Ä—è–¥–æ–∫ —Å—Ç–æ–ª–±—Ü–æ–≤, –µ—Å–ª–∏ –Ω—É–∂–Ω–æ –∏–∑–º–µ–Ω–∏—Ç—å
    custom_column_order = [
        'Gene', 'UniProt ID', 'Protein name',
        'Drug ChEMBL ID', 'Drug name ChEMBL', 'Drug name TTD',
        'Drug Mechanism ChEMBL', 'pChEMBL value',
        'Activity type ChEMBL', 'Activity value ChEMBL',
        'Activity type TTD', 'Activity value TTD',
        'Max phase ChEMBL', 'Max phase TTD',
        'Action type ChEMBL', 'Mechanism of Action ChEMBL', 
        'Action type TTD', 'Database'
    ]
    
    result = merge_gene_drug_files(chembl_file, ttd_file, column_order=custom_column_order)
    
    if result is not None:
        print("\n–ü–µ—Ä–≤—ã–µ 5 —Å—Ç—Ä–æ–∫ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞:")
        print(result.head())


‚úÖ –§–∞–π–ª —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω: c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\–ù–æ–≤—ã–µ\–û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ\old.csv
–í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫: 981
–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º:
Database
ChEMBL          905
TTD              68
ChEMBL & TTD      8
Name: count, dtype: int64

–ü–µ—Ä–≤—ã–µ 5 —Å—Ç—Ä–æ–∫ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞:
   Gene UniProt ID                  Protein name Drug ChEMBL ID  \
2   ALK     Q9UM73  ALK tyrosine kinase receptor  CHEMBL1983268   
13  ALK     Q9UM73  ALK tyrosine kinase receptor  CHEMBL2403108   
35  ALK     Q9UM73  ALK tyrosine kinase receptor  CHEMBL3286830   
40  ALK     Q9UM73  ALK tyrosine kinase receptor  CHEMBL3545311   
41  ALK     Q9UM73  ALK tyrosine kinase receptor  CHEMBL3545360   

   Drug name ChEMBL Drug name TTD Drug Mechanism ChEMBL  pChEMBL value  \
2       ENTRECTINIB   Entrectinib                    –î–∞           9.00   
13        CERITINIB     Ceritinib                    –î–∞       

In [1]:
import pandas as pd
import os
import numpy as np
from chembl_webresource_client.new_client import new_client
from typing import List, Dict, Set, Optional, Tuple
import re

# –§—É–Ω–∫—Ü–∏–∏ –¥–ª—è –Ω–æ—Ä–º–∞–ª–∏–∑–∞—Ü–∏–∏ –∏ —Å–æ–ø–æ—Å—Ç–∞–≤–ª–µ–Ω–∏—è –Ω–∞–∑–≤–∞–Ω–∏–π –ø—Ä–µ–ø–∞—Ä–∞—Ç–æ–≤
def normalize(text: str) -> str:
    """–†–∞–¥–∏–∫–∞–ª—å–Ω–∞—è –Ω–æ—Ä–º–∞–ª–∏–∑–∞—Ü–∏—è —Ç–µ–∫—Å—Ç–∞"""
    return re.sub(r"[-‚Äì‚Äî'\",.(){}\[\]:;!?\\/+\s]", "", text.upper())

def basic_clean(text: str) -> str:
    """–ú—è–≥–∫–∞—è –Ω–æ—Ä–º–∞–ª–∏–∑–∞—Ü–∏—è"""
    return re.sub(r"[-‚Äì‚Äî'\",.(){}\[\]:;!?\\/]", "", text.upper())

def get_chembl_family_mapping(chembl_ids: List[str]) -> Dict[str, str]:
    """–°–æ–∑–¥–∞–µ–º mapping: –ª—é–±–æ–π ChEMBL ID -> –±–ª–∏–∂–∞–π—à–∏–π —Ä–æ–¥–∏—Ç–µ–ª—å—Å–∫–∏–π ID –∏–∑ –Ω–∞—à–µ–≥–æ —Å–ø–∏—Å–∫–∞"""
    molecule_form = new_client.molecule_form
    mapping = {}
    
    for chembl_id in chembl_ids:
        mapping[chembl_id] = chembl_id  # –°–∞–º —Å–µ–±–µ —Ä–æ–¥–∏—Ç–µ–ª—å
        
        try:
            # –ü–æ–ª—É—á–∞–µ–º –≤—Å–µ—Ö —á–ª–µ–Ω–æ–≤ —Å–µ–º–µ–π—Å—Ç–≤–∞
            family_members = list(molecule_form.filter(parent_chembl_id=chembl_id).only('molecule_chembl_id'))
            for member in family_members:
                if 'molecule_chembl_id' in member:
                    mapping[member['molecule_chembl_id']] = chembl_id
        except Exception as e:
            print(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ –ø–æ–ª—É—á–µ–Ω–∏–∏ —Å–µ–º–µ–π—Å—Ç–≤–∞ –¥–ª—è {chembl_id}: {e}")
    
    return mapping

def find_drug_matches(ttd_drug_names: List[str], chembl_ids: List[str]) -> Dict[str, str]:
    """–ù–∞—Ö–æ–¥–∏—Ç —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤–∏—è –º–µ–∂–¥—É –Ω–∞–∑–≤–∞–Ω–∏—è–º–∏ –ø—Ä–µ–ø–∞—Ä–∞—Ç–æ–≤ –∏–∑ TTD –∏ ChEMBL ID"""
    molecule = new_client.molecule
    matches = {}
    family_mapping = get_chembl_family_mapping(chembl_ids)
    all_related_ids = set(family_mapping.keys())
    
    for drug in ttd_drug_names:
        if pd.isna(drug) or drug.strip() == "":
            continue
            
        found = False
        # 1Ô∏è‚É£ –ü–µ—Ä–≤–∞—è —Å—Ç–∞–¥–∏—è: —Ç–æ—á–Ω—ã–π –ø–æ–∏—Å–∫ –ø–æ —Å–∏–Ω–æ–Ω–∏–º–∞–º
        for chembl_id in all_related_ids:
            try:
                mol = molecule.get(chembl_id)
                synonyms = mol.get("molecule_synonyms", [])
                for syn in synonyms:
                    if isinstance(syn, dict) and drug.lower() == syn.get("molecule_synonym", "").lower():
                        matches[drug] = family_mapping[chembl_id]
                        found = True
                        break
                if found:
                    break
            except Exception:
                continue
        
        if found:
            continue
            
        # 2Ô∏è‚É£ –í—Ç–æ—Ä–∞—è —Å—Ç–∞–¥–∏—è: –ø–æ–∏—Å–∫ –≤ –≤–µ—Ä—Ö–Ω–µ–º —Ä–µ–≥–∏—Å—Ç—Ä–µ
        upper_drug = drug.upper()
        for chembl_id in all_related_ids:
            try:
                mol = molecule.get(chembl_id)
                synonyms = mol.get("molecule_synonyms", [])
                for syn in synonyms:
                    if isinstance(syn, dict) and upper_drug == syn.get("molecule_synonym", "").upper():
                        matches[drug] = family_mapping[chembl_id]
                        found = True
                        break
                if found:
                    break
            except Exception:
                continue
        
        if found:
            continue
            
        # 3Ô∏è‚É£ –¢—Ä–µ—Ç—å—è —Å—Ç–∞–¥–∏—è: –º—è–≥–∫–∞—è –Ω–æ—Ä–º–∞–ª–∏–∑–∞—Ü–∏—è
        cleaned_drug = basic_clean(drug)
        for chembl_id in all_related_ids:
            try:
                mol = molecule.get(chembl_id)
                synonyms = mol.get("molecule_synonyms", [])
                for syn in synonyms:
                    if isinstance(syn, dict) and cleaned_drug == basic_clean(syn.get("molecule_synonym", "")):
                        matches[drug] = family_mapping[chembl_id]
                        found = True
                        break
                if found:
                    break
            except Exception:
                continue
    
    return matches

def merge_gene_drug_files(file1_path, file2_path, output_filename="chembl_ttd_integration_data.csv", column_order=None):
    """
    –û–±—ä–µ–¥–∏–Ω—è–µ—Ç –¥–≤–∞ —Ñ–∞–π–ª–∞ –¥–∞–Ω–Ω—ã—Ö –æ –≥–µ–Ω–∞—Ö –∏ –ø—Ä–µ–ø–∞—Ä–∞—Ç–∞—Ö —Å —É—á–µ—Ç–æ–º –≤—Å–µ—Ö —Ç—Ä–µ–±–æ–≤–∞–Ω–∏–π:
    - –û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –ø–æ Gene, UniProt ID –∏ Drug CHEMBL ID
    - –û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –ø–æ –Ω–∞–∑–≤–∞–Ω–∏—è–º –ø—Ä–µ–ø–∞—Ä–∞—Ç–æ–≤ —Å –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ–º ChEMBL API
    - –ö–æ—Ä—Ä–µ–∫—Ç–Ω–∞—è –æ–±—Ä–∞–±–æ—Ç–∫–∞ Drug name
    - –ú–∞—Ä–∫–∏—Ä–æ–≤–∫–∞ –∏—Å—Ç–æ—á–Ω–∏–∫–∞ –¥–∞–Ω–Ω—ã—Ö
    """
    try:
        # 1. –ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö
        df_chembl = pd.read_csv(file1_path, sep=None, engine='python', encoding='utf-8')
        df_ttd = pd.read_csv(file2_path, sep=None, engine='python', encoding='utf-8')
        
        # 2. –°—Ç–∞–Ω–¥–∞—Ä—Ç–∏–∑–∞—Ü–∏—è –Ω–∞–∑–≤–∞–Ω–∏–π —Å—Ç–æ–ª–±—Ü–æ–≤
        df_chembl.columns = df_chembl.columns.str.strip()
        df_ttd.columns = df_ttd.columns.str.strip()

        # 3. –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ Drug CHEMBL ID (–∏—Å–ø–æ–ª—å–∑—É–µ–º —Å—Ç–∞—Ä—É—é –ª–æ–≥–∏–∫—É)
        def get_drug_chembl_id(df):
            for col in ['Drug ChEMBL ID', 'Drug_ID_ChEMBL', 'Drug_CHEMBL_ID', 'Drug CHEMBL ID']:
                if col in df.columns:
                    return df[col].astype(str).replace('nan', 'N/A')
            return pd.Series(['N/A'] * len(df))

        df_chembl['Drug_CHEMBL_ID'] = get_drug_chembl_id(df_chembl)
        df_ttd['Drug_CHEMBL_ID'] = get_drug_chembl_id(df_ttd)

        # 4. –ü–æ–ª—É—á–∞–µ–º –≤—Å–µ —É–Ω–∏–∫–∞–ª—å–Ω—ã–µ ChEMBL ID –¥–ª—è –ø–æ–∏—Å–∫–∞ —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤–∏–π
        all_chembl_ids = list(set(df_chembl['Drug_CHEMBL_ID'].unique().tolist() + 
                                df_ttd['Drug_CHEMBL_ID'].unique().tolist()))
        all_chembl_ids = [x for x in all_chembl_ids if x != 'N/A']

        # 5. –ù–∞—Ö–æ–¥–∏–º —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤–∏—è –º–µ–∂–¥—É –Ω–∞–∑–≤–∞–Ω–∏—è–º–∏ –ø—Ä–µ–ø–∞—Ä–∞—Ç–æ–≤ –∏–∑ TTD –∏ ChEMBL ID
        ttd_drug_names = df_ttd['Drug name'].dropna().unique().tolist()
        drug_matches = find_drug_matches(ttd_drug_names, all_chembl_ids)

        # 6. –°–æ–∑–¥–∞–µ–º –≤—Ä–µ–º–µ–Ω–Ω—ã–µ —Å—Ç–æ–ª–±—Ü—ã –¥–ª—è –æ–±—ä–µ–¥–∏–Ω–µ–Ω–∏—è
        df_ttd['Drug_CHEMBL_ID_from_name'] = df_ttd['Drug name'].map(drug_matches)
        
        # 7. –î–æ–±–∞–≤–ª—è–µ–º –º–µ—Ç–∫–∏ –∏—Å—Ç–æ—á–Ω–∏–∫–æ–≤
        df_chembl['_source_chembl'] = True
        df_ttd['_source_ttd'] = True

        # 8. –û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö –ø–æ –∫–ª—é—á–µ–≤—ã–º –ø–æ–ª—è–º (ChEMBL ID)
        merge_keys = ['Gene', 'UniProt ID', 'Drug_CHEMBL_ID']
        merged = pd.merge(
            df_chembl, 
            df_ttd, 
            on=merge_keys, 
            how='outer', 
            suffixes=('_ChEMBL', '_TTD'),
            indicator='Database'
        )

        # 9. –û–±–Ω–æ–≤–ª—è–µ–º —Å—Ç–æ–ª–±–µ—Ü Database –¥–ª—è –ø–æ–Ω—è—Ç–Ω–æ–≥–æ –æ—Ç–æ–±—Ä–∞–∂–µ–Ω–∏—è –∏—Å—Ç–æ—á–Ω–∏–∫–æ–≤
        merged['Database'] = merged['Database'].map({
            'left_only': 'ChEMBL',
            'right_only': 'TTD',
            'both': 'ChEMBL & TTD'
        })

        # 10. –û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –ø–æ –Ω–∞–∑–≤–∞–Ω–∏—è–º –ø—Ä–µ–ø–∞—Ä–∞—Ç–æ–≤ (–≥–¥–µ ChEMBL ID –Ω–µ —Å–æ–≤–ø–∞–ª–∏)
        for idx, row in df_ttd.iterrows():
            if pd.notna(row['Drug_CHEMBL_ID_from_name']) and row['Drug_CHEMBL_ID_from_name'] != 'N/A':
                # –ò—â–µ–º —Å—Ç—Ä–æ–∫–∏ –≤ df_chembl —Å —Ç–∞–∫–∏–º ChEMBL ID
                chembl_rows = df_chembl[df_chembl['Drug_CHEMBL_ID'] == row['Drug_CHEMBL_ID_from_name']]
                
                for _, chembl_row in chembl_rows.iterrows():
                    # –ü—Ä–æ–≤–µ—Ä—è–µ–º, —á—Ç–æ —Ç–∞–∫–∞—è —Å—Ç—Ä–æ–∫–∞ –µ—â–µ –Ω–µ –æ–±—ä–µ–¥–∏–Ω–µ–Ω–∞
                    merge_condition = (
                        (merged['Gene'] == chembl_row['Gene']) & 
                        (merged['UniProt ID'] == chembl_row['UniProt ID']) & 
                        (merged['Drug_CHEMBL_ID'] == row['Drug_CHEMBL_ID_from_name'])
                    )
                    
                    if not merged[merge_condition].empty:
                        continue
                        
                    # –°–æ–∑–¥–∞–µ–º –Ω–æ–≤—É—é –æ–±—ä–µ–¥–∏–Ω–µ–Ω–Ω—É—é —Å—Ç—Ä–æ–∫—É
                    new_row = {
                        'Gene': chembl_row['Gene'],
                        'UniProt ID': chembl_row['UniProt ID'],
                        'Drug_CHEMBL_ID': row['Drug_CHEMBL_ID_from_name'],
                        'Database': '–û–±–∞'
                    }
                    
                    # –ó–∞–ø–æ–ª–Ω—è–µ–º –æ—Å—Ç–∞–ª—å–Ω—ã–µ –ø–æ–ª—è –∏–∑ ChEMBL
                    for col in df_chembl.columns:
                        if col not in merge_keys and col != 'Drug_CHEMBL_ID' and col != '_source_chembl':
                            new_col = f"{col}_ChEMBL" if col in df_ttd.columns else col
                            new_row[new_col] = chembl_row[col]
                    
                    # –ó–∞–ø–æ–ª–Ω—è–µ–º –æ—Å—Ç–∞–ª—å–Ω—ã–µ –ø–æ–ª—è –∏–∑ TTD
                    for col in df_ttd.columns:
                        if col not in merge_keys and col != 'Drug_CHEMBL_ID' and col != '_source_ttd' and col != 'Drug_CHEMBL_ID_from_name':
                            new_col = f"{col}_TTD" if col in df_chembl.columns else col
                            new_row[new_col] = row[col]
                    
                    merged = pd.concat([merged, pd.DataFrame([new_row])], ignore_index=True)

        # 11. –û–±—Ä–∞–±–æ—Ç–∫–∞ Protein name
        if 'Protein name_ChEMBL' in merged.columns and 'Protein name_TTD' in merged.columns:
            merged['Protein name'] = merged['Protein name_ChEMBL'].combine_first(merged['Protein name_TTD'])
            merged.drop(['Protein name_ChEMBL', 'Protein name_TTD'], axis=1, inplace=True, errors='ignore')
        elif 'Protein name_ChEMBL' in merged.columns:
            merged.rename(columns={'Protein name_ChEMBL': 'Protein name'}, inplace=True)
        elif 'Protein name_TTD' in merged.columns:
            merged.rename(columns={'Protein name_TTD': 'Protein name'}, inplace=True)

        # 12. –û–±—Ä–∞–±–æ—Ç–∫–∞ Drug name
        if 'Drug name_ChEMBL' in merged.columns and 'Drug name_TTD' in merged.columns:
            merged['Drug name'] = merged['Drug name_ChEMBL'].combine_first(merged['Drug name_TTD'])
            merged.drop(['Drug name_ChEMBL', 'Drug name_TTD'], axis=1, inplace=True, errors='ignore')
        elif 'Drug name_ChEMBL' in merged.columns:
            merged.rename(columns={'Drug name_ChEMBL': 'Drug name'}, inplace=True)
        elif 'Drug name_TTD' in merged.columns:
            merged.rename(columns={'Drug name_TTD': 'Drug name'}, inplace=True)

        # 13. –£–¥–∞–ª–µ–Ω–∏–µ –≤—Ä–µ–º–µ–Ω–Ω—ã—Ö –∏ –ª–∏—à–Ω–∏—Ö —Å—Ç–æ–ª–±—Ü–æ–≤
        cols_to_drop = ['_source_chembl', '_source_ttd', 'Drug_CHEMBL_ID_from_name',
                       'Drug TTD ID', 'Protein TTD ID', 'Protein ChEMBL ID']
        
        merged.drop([col for col in cols_to_drop if col in merged.columns], axis=1, inplace=True, errors='ignore')

        # 14. –£–¥–∞–ª–µ–Ω–∏–µ –¥—É–±–ª–∏–∫–∞—Ç–æ–≤ —Å—Ç–æ–ª–±—Ü–æ–≤ Drug CHEMBL ID (–∏–∑ —Å—Ç–∞—Ä–æ–π –≤–µ—Ä—Å–∏–∏)
        merged = merged.loc[:, ~merged.columns.duplicated()].copy()
        merged = merged.loc[:, ~merged.columns.str.contains('^Drug CHEMBL ID$', case=False, regex=True)].copy()
        
        # 15. –ü–µ—Ä–µ–∏–º–µ–Ω–æ–≤—ã–≤–∞–µ–º Drug_CHEMBL_ID –≤ Drug ChEMBL ID (–∏–∑ —Å—Ç–∞—Ä–æ–π –≤–µ—Ä—Å–∏–∏)
        merged.rename(columns={'Drug_CHEMBL_ID': 'Drug ChEMBL ID'}, inplace=True)

        # 16. –ï—Å–ª–∏ –ø–µ—Ä–µ–¥–∞–Ω —Å–ø–∏—Å–æ–∫ column_order, –ø—Ä–∏–º–µ–Ω—è–µ–º –µ–≥–æ
        if column_order:
            # –ü—Ä–æ–≤–µ—Ä–∏–º, —á—Ç–æ –≤—Å–µ —Å—Ç–æ–ª–±—Ü—ã –∏–∑ column_order –ø—Ä–∏—Å—É—Ç—Å—Ç–≤—É—é—Ç –≤ –¥–∞–Ω–Ω—ã—Ö
            existing_cols = [col for col in column_order if col in merged.columns]
            other_cols = [col for col in merged.columns if col not in existing_cols]
            merged = merged[existing_cols + other_cols]

        # 17. –°–æ—Ä—Ç–∏—Ä–æ–≤–∫–∞ –ø–æ –∑–Ω–∞—á–µ–Ω–∏—é –≤ —Å—Ç–æ–ª–±—Ü–µ 'Drug Mechanism ChEMBL'
        if 'Drug Mechanism ChEMBL' in merged.columns:
            def sort_drug_mechanism(row):
                if row == "–î–∞":
                    return 0
                elif row == "–ù–µ—Ç":
                    return 1
                else:
                    return 2

            merged['Drug Mechanism ChEMBL_sort'] = merged['Drug Mechanism ChEMBL'].apply(sort_drug_mechanism)
            merged = merged.sort_values(by=['Gene', 'Drug Mechanism ChEMBL_sort'])
            merged.drop('Drug Mechanism ChEMBL_sort', axis=1, inplace=True)

        # 18. –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞
        output_path = os.path.join(os.getcwd(), output_filename)
        merged.to_csv(output_path, sep=',', index=False, encoding='utf-8')
        
        # 19. –í–∞–ª–∏–¥–∞—Ü–∏—è —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞
        print(f"‚úÖ –§–∞–π–ª —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω: {output_path}")
        print(f"–í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫: {len(merged)}")
        print("–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º:")
        print(merged['Database'].value_counts())
        
        return merged

    except Exception as e:
        print(f"‚ùå –û—à–∏–±–∫–∞: {str(e)}")
        if 'df_chembl' in locals():
            print("\n–°—Ç–æ–ª–±—Ü—ã –≤ —Ñ–∞–π–ª–µ ChEMBL:", df_chembl.columns.tolist())
        if 'df_ttd' in locals():
            print("–°—Ç–æ–ª–±—Ü—ã –≤ —Ñ–∞–π–ª–µ TTD:", df_ttd.columns.tolist())
        return None

# –ü—Ä–∏–º–µ—Ä –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è
if __name__ == '__main__':
    # –£–∫–∞–∂–∏—Ç–µ –ø—É—Ç–∏ –∫ –≤–∞—à–∏–º —Ñ–∞–π–ª–∞–º
    chembl_file = r"../chembl/extracted_chembl_gene_drug_data.csv"
    ttd_file = r"../ttd/extracted_ttd_gene_drug_data.csv"
    
    # –û–ø—Ä–µ–¥–µ–ª–∏—Ç–µ –ø–æ—Ä—è–¥–æ–∫ —Å—Ç–æ–ª–±—Ü–æ–≤, –µ—Å–ª–∏ –Ω—É–∂–Ω–æ –∏–∑–º–µ–Ω–∏—Ç—å
    custom_column_order = [
        'Gene', 'UniProt ID', 'Protein name',
        'Drug ChEMBL ID', 'Drug name',
        'Drug Mechanism ChEMBL', 'pChEMBL value',
        'Activity type ChEMBL', 'Activity value ChEMBL',
        'Activity type TTD', 'Activity value TTD',
        'Max phase ChEMBL', 'Max phase TTD',
        'Action type ChEMBL', 'Mechanism of Action ChEMBL', 
        'Action type TTD', 'Database'
    ]
    
    result = merge_gene_drug_files(chembl_file, ttd_file, column_order=custom_column_order)
    
    if result is not None:
        print("\n–ü–µ—Ä–≤—ã–µ 5 —Å—Ç—Ä–æ–∫ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞:")
        print(result.head())

‚úÖ –§–∞–π–ª —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω: c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\chembl_ttd_integration_data.csv
–í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫: 211
–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º:
Database
ChEMBL          183
TTD              23
ChEMBL & TTD      5
Name: count, dtype: int64

–ü–µ—Ä–≤—ã–µ 5 —Å—Ç—Ä–æ–∫ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞:
   Gene UniProt ID                  Protein name Drug ChEMBL ID    Drug name  \
2   ALK     Q9UM73  ALK tyrosine kinase receptor  CHEMBL1983268  ENTRECTINIB   
13  ALK     Q9UM73  ALK tyrosine kinase receptor  CHEMBL2403108    CERITINIB   
35  ALK     Q9UM73  ALK tyrosine kinase receptor  CHEMBL3286830   LORLATINIB   
40  ALK     Q9UM73  ALK tyrosine kinase receptor  CHEMBL3545311   BRIGATINIB   
41  ALK     Q9UM73  ALK tyrosine kinase receptor  CHEMBL3545360     ASP-3026   

   Drug Mechanism ChEMBL  pChEMBL value Activity type ChEMBL  \
2                     –î–∞           9.00     

In [6]:
import pandas as pd
import os
import numpy as np
from chembl_webresource_client.new_client import new_client
from typing import List, Dict, Set, Optional, Tuple
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import lru_cache
from tqdm import tqdm
import time
import warnings

# –ù–∞—Å—Ç—Ä–æ–π–∫–∏
warnings.filterwarnings('ignore')
API_DELAY = 0.2
MAX_WORKERS = 2
CACHE_SIZE = 1000
BATCH_SIZE = 50

# –§—É–Ω–∫—Ü–∏–∏ –Ω–æ—Ä–º–∞–ª–∏–∑–∞—Ü–∏–∏
def normalize(text: str) -> str:
    return re.sub(r"[-‚Äì‚Äî'\",.(){}\[\]:;!?\\/+\s]", "", text.upper())

def basic_clean(text: str) -> str:
    return re.sub(r"[-‚Äì‚Äî'\",.(){}\[\]:;!?\\/]", "", text.upper())

# –ö—ç—à–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ –∑–∞–ø—Ä–æ—Å—ã
@lru_cache(maxsize=CACHE_SIZE)
def get_molecule_cached(chembl_id: str) -> Optional[dict]:
    time.sleep(API_DELAY)
    try:
        return new_client.molecule.get(chembl_id)
    except Exception as e:
        print(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ –ø–æ–ª—É—á–µ–Ω–∏–∏ {chembl_id}: {str(e)}")
        return None

def get_molecules_batch(chembl_ids: List[str]) -> Dict[str, Optional[dict]]:
    results = {}
    for i in range(0, len(chembl_ids), BATCH_SIZE):
        batch = chembl_ids[i:i+BATCH_SIZE]
        try:
            molecules = new_client.molecule.filter(molecule_chembl_id__in=batch).only([
                'molecule_chembl_id', 'molecule_synonyms', 'parent_chembl_id'
            ])
            for mol in molecules:
                results[mol['molecule_chembl_id']] = mol
            time.sleep(API_DELAY * 2)
        except Exception as e:
            print(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ –ø–æ–ª—É—á–µ–Ω–∏–∏ –ø–∞—á–∫–∏: {str(e)}")
            for chembl_id in batch:
                results[chembl_id] = get_molecule_cached(chembl_id)
    return results

def get_chembl_family_mapping_safe(chembl_ids: List[str]) -> Dict[str, str]:
    mapping = {}
    for chembl_id in tqdm(chembl_ids, desc="Building family map"):
        mapping[chembl_id] = chembl_id
        try:
            time.sleep(API_DELAY)
            family_members = list(new_client.molecule_form.filter(parent_chembl_id=chembl_id).only('molecule_chembl_id'))
            for member in family_members:
                if 'molecule_chembl_id' in member:
                    mapping[member['molecule_chembl_id']] = chembl_id
        except Exception as e:
            print(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ –ø–æ–ª—É—á–µ–Ω–∏–∏ —Å–µ–º–µ–π—Å—Ç–≤–∞ –¥–ª—è {chembl_id}: {str(e)}")
    return mapping

def find_drug_matches_safe(ttd_drug_names: List[str], chembl_ids: List[str]) -> Dict[str, str]:
    if not chembl_ids or not ttd_drug_names:
        return {}
    
    matches = {}
    chunk_size = 100
    
    for i in tqdm(range(0, len(chembl_ids), chunk_size), desc="Processing ChEMBL IDs"):
        chunk_ids = chembl_ids[i:i+chunk_size]
        molecules_data = get_molecules_batch(chunk_ids)
        family_mapping = get_chembl_family_mapping_safe(chunk_ids)
        
        synonyms_index = {}
        for chembl_id, mol_data in molecules_data.items():
            if not mol_data or 'molecule_synonyms' not in mol_data:
                continue
                
            for syn in mol_data['molecule_synonyms']:
                if not isinstance(syn, dict):
                    continue
                    
                syn_text = syn.get("molecule_synonym", "")
                if not syn_text:
                    continue
                    
                variants = {syn_text.lower(), syn_text.upper(), basic_clean(syn_text)}
                
                for variant in variants:
                    if variant not in synonyms_index:
                        synonyms_index[variant] = chembl_id
        
        for drug in ttd_drug_names:
            if pd.isna(drug) or not str(drug).strip() or drug in matches:
                continue
                
            for variant in [str(drug).lower(), str(drug).upper(), basic_clean(str(drug))]:
                if variant in synonyms_index:
                    chembl_id = synonyms_index[variant]
                    matches[drug] = family_mapping.get(chembl_id, chembl_id)
                    break
    
    return matches

def remove_duplicates(df: pd.DataFrame) -> pd.DataFrame:
    """–£–¥–∞–ª–µ–Ω–∏–µ –¥—É–±–ª–∏–∫–∞—Ç–æ–≤ —Å –ø—Ä–∏–æ—Ä–∏—Ç–µ—Ç–æ–º –Ω–∞ 'ChEMBL & TTD'"""
    if df.empty:
        return df
    
    # –°–æ–∑–¥–∞–µ–º —Å—Ç–æ–ª–±–µ—Ü –¥–ª—è —Å–æ—Ä—Ç–∏—Ä–æ–≤–∫–∏ (–ø—Ä–∏–æ—Ä–∏—Ç–µ—Ç: 'ChEMBL & TTD' > 'ChEMBL' > 'TTD')
    df['_sort_priority'] = df['Database'].map({
        'ChEMBL & TTD': 0,
        'ChEMBL': 1,
        'TTD': 2
    })
    
    # –°–æ—Ä—Ç–∏—Ä—É–µ–º –ø–æ –ø—Ä–∏–æ—Ä–∏—Ç–µ—Ç—É –∏ —É–¥–∞–ª—è–µ–º –¥—É–±–ª–∏–∫–∞—Ç—ã
    df = df.sort_values(by=['Gene', 'Drug ChEMBL ID', '_sort_priority'])
    df = df.drop_duplicates(subset=['Gene', 'Drug ChEMBL ID'], keep='first')
    
    # –£–¥–∞–ª—è–µ–º –≤—Ä–µ–º–µ–Ω–Ω—ã–π —Å—Ç–æ–ª–±–µ—Ü
    df = df.drop(columns=['_sort_priority'])
    
    return df

def safe_merge_data(df_chembl: pd.DataFrame, df_ttd: pd.DataFrame) -> pd.DataFrame:
    try:
        merge_keys = ['Gene', 'UniProt ID', 'Drug_CHEMBL_ID']
        
        df_chembl['_source_chembl'] = True
        df_ttd['_source_ttd'] = True
        
        merged = pd.merge(
            df_chembl, 
            df_ttd, 
            on=merge_keys, 
            how='outer', 
            suffixes=('_ChEMBL', '_TTD'),
            indicator='Database'
        )
        
        merged['Database'] = merged['Database'].map({
            'left_only': 'ChEMBL',
            'right_only': 'TTD',
            'both': 'ChEMBL & TTD'
        })
        
        return merged
        
    except Exception as e:
        print(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ –æ–±—ä–µ–¥–∏–Ω–µ–Ω–∏–∏ –¥–∞–Ω–Ω—ã—Ö: {str(e)}")
        return pd.DataFrame()

def merge_gene_drug_files_safe(file1_path, file2_path, output_filename, column_order=None):
    try:
        # 1. –ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö
        dtype_spec = {
            'Gene': 'string',
            'UniProt ID': 'string',
            'Drug ChEMBL ID': 'string',
            'Drug_ID_ChEMBL': 'string',
            'Drug_CHEMBL_ID': 'string',
            'Drug name': 'string',
            'Protein name': 'string'
        }
        
        df_chembl = pd.read_csv(file1_path, sep=None, engine='python', 
                              encoding='utf-8', dtype=dtype_spec, na_values=['NA', 'N/A', ''])
        df_ttd = pd.read_csv(file2_path, sep=None, engine='python',
                           encoding='utf-8', dtype=dtype_spec, na_values=['NA', 'N/A', ''])

        # 2. –°—Ç–∞–Ω–¥–∞—Ä—Ç–∏–∑–∞—Ü–∏—è —Å—Ç–æ–ª–±—Ü–æ–≤
        df_chembl.columns = df_chembl.columns.str.strip()
        df_ttd.columns = df_ttd.columns.str.strip()

        # 3. –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ Drug CHEMBL ID
        def get_drug_chembl_id(df):
            for col in ['Drug ChEMBL ID', 'Drug_ID_ChEMBL', 'Drug_CHEMBL_ID', 'Drug CHEMBL ID']:
                if col in df.columns:
                    return df[col].astype('string').replace(['nan', 'NA', 'N/A'], pd.NA)
            return pd.Series([pd.NA] * len(df), dtype='string')

        df_chembl['Drug_CHEMBL_ID'] = get_drug_chembl_id(df_chembl)
        df_ttd['Drug_CHEMBL_ID'] = get_drug_chembl_id(df_ttd)

        # 4. –ü–æ–ª—É—á–∞–µ–º —É–Ω–∏–∫–∞–ª—å–Ω—ã–µ ChEMBL ID
        chembl_ids = pd.concat([
            df_chembl['Drug_CHEMBL_ID'],
            df_ttd['Drug_CHEMBL_ID']
        ]).dropna().unique().tolist()
        
        # 5. –ü–æ–∏—Å–∫ —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤–∏–π
        ttd_drug_names = df_ttd['Drug name'].dropna().unique().tolist()
        if chembl_ids and ttd_drug_names:
            print("–ù–∞—á–∞–ª–æ –ø–æ–∏—Å–∫–∞ —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤–∏–π –ø—Ä–µ–ø–∞—Ä–∞—Ç–æ–≤...")
            drug_matches = find_drug_matches_safe(ttd_drug_names, chembl_ids)
            df_ttd['Drug_CHEMBL_ID_from_name'] = df_ttd['Drug name'].map(drug_matches)
        else:
            df_ttd['Drug_CHEMBL_ID_from_name'] = pd.NA
        
        # 6. –û—Å–Ω–æ–≤–Ω–æ–µ –æ–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ
        print("–û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö...")
        merged = safe_merge_data(df_chembl, df_ttd)
        if merged.empty:
            print("–û—à–∏–±–∫–∞: –Ω–µ —É–¥–∞–ª–æ—Å—å –æ–±—ä–µ–¥–∏–Ω–∏—Ç—å –¥–∞–Ω–Ω—ã–µ")
            return None

        # 7. –î–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω–æ–µ –æ–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –ø–æ –Ω–∞–∑–≤–∞–Ω–∏—è–º –ø—Ä–µ–ø–∞—Ä–∞—Ç–æ–≤
        if 'Drug_CHEMBL_ID_from_name' in df_ttd.columns:
            name_matches = df_ttd[df_ttd['Drug_CHEMBL_ID_from_name'].notna()]
            if not name_matches.empty:
                chembl_matches = df_chembl[df_chembl['Drug_CHEMBL_ID'].isin(name_matches['Drug_CHEMBL_ID_from_name'])]
                
                if not chembl_matches.empty:
                    temp_merged = pd.merge(
                        chembl_matches,
                        name_matches,
                        left_on=['Gene', 'UniProt ID', 'Drug_CHEMBL_ID'],
                        right_on=['Gene', 'UniProt ID', 'Drug_CHEMBL_ID_from_name'],
                        how='inner',
                        suffixes=('_ChEMBL', '_TTD')
                    )
                    temp_merged['Database'] = 'ChEMBL & TTD'
                    merged = pd.concat([merged, temp_merged], ignore_index=True)

        # 8. –£–¥–∞–ª–µ–Ω–∏–µ –¥—É–±–ª–∏–∫–∞—Ç–æ–≤ –ø–µ—Ä–µ–¥ –æ–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ–º —Å—Ç–æ–ª–±—Ü–æ–≤
        print("–£–¥–∞–ª–µ–Ω–∏–µ –¥—É–±–ª–∏–∫–∞—Ç–æ–≤...")
        merged = remove_duplicates(merged)

        # 9. –û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ —Å—Ç–æ–ª–±—Ü–æ–≤
        def combine_columns(prefix):
            col1, col2 = f'{prefix}_ChEMBL', f'{prefix}_TTD'
            if col1 in merged.columns and col2 in merged.columns:
                merged[prefix] = merged[col1].combine_first(merged[col2])
                merged.drop([col1, col2], axis=1, inplace=True)
            elif col1 in merged.columns:
                merged.rename(columns={col1: prefix}, inplace=True)
            elif col2 in merged.columns:
                merged.rename(columns={col2: prefix}, inplace=True)

        for col in ['Protein name', 'Drug name']:
            combine_columns(col)

        # 10. –£–¥–∞–ª–µ–Ω–∏–µ –≤—Ä–µ–º–µ–Ω–Ω—ã—Ö —Å—Ç–æ–ª–±—Ü–æ–≤
        cols_to_drop = ['_source_chembl', '_source_ttd', 'Drug_CHEMBL_ID_from_name',
                       'Drug TTD ID', 'Protein TTD ID', 'Protein ChEMBL ID']
        merged.drop(columns=[col for col in cols_to_drop if col in merged.columns], inplace=True)

        # 11. –ü–µ—Ä–µ–∏–º–µ–Ω–æ–≤–∞–Ω–∏–µ –∏ —Å–æ—Ä—Ç–∏—Ä–æ–≤–∫–∞
        merged.rename(columns={'Drug_CHEMBL_ID': 'Drug ChEMBL ID'}, inplace=True)

        if column_order:
            existing_cols = [col for col in column_order if col in merged.columns]
            other_cols = [col for col in merged.columns if col not in existing_cols]
            merged = merged[existing_cols + other_cols]

        if 'Drug Mechanism ChEMBL' in merged.columns:
            sort_order = {"Yes": 0, "No": 1, None: 2}
            merged['_sort_temp'] = merged['Drug Mechanism ChEMBL'].map(sort_order)
            merged.sort_values(['Gene', '_sort_temp'], inplace=True)
            merged.drop('_sort_temp', axis=1, inplace=True)

        # 12. –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞
        os.makedirs(os.path.dirname(output_filename) or '.', exist_ok=True)
        merged.to_csv(output_filename, index=False, encoding='utf-8')
        print(f"–§–∞–π–ª —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω: {output_filename}")
        return merged

    except Exception as e:
        print(f"‚ùå –ö—Ä–∏—Ç–∏—á–µ—Å–∫–∞—è –æ—à–∏–±–∫–∞ –ø—Ä–∏ –æ–±—Ä–∞–±–æ—Ç–∫–µ —Ñ–∞–π–ª–æ–≤: {str(e)}")
        return None

def process_files_safely(file_pairs: List[Tuple[str, str, str]], column_order: List[str]):
    for chembl_path, ttd_path, output_filename in file_pairs:
        print(f"\nüîç –ù–∞—á–∞—Ç–∞ –æ–±—Ä–∞–±–æ—Ç–∫–∞ —Ñ–∞–π–ª–æ–≤:")
        print(f"ChEMBL: {chembl_path}")
        print(f"TTD: {ttd_path}")
        
        result = merge_gene_drug_files_safe(
            chembl_path, 
            ttd_path, 
            output_filename, 
            column_order=column_order
        )
        
        if result is not None:
            print(f"‚úÖ –£—Å–ø–µ—à–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω –∫–∞–∫ {output_filename}")
            print(f"–í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫: {len(result)}")
            print("–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º:")
            print(result['Database'].value_counts())
        else:
            print(f"‚ùå –ù–µ —É–¥–∞–ª–æ—Å—å –æ–±—Ä–∞–±–æ—Ç–∞—Ç—å —Ñ–∞–π–ª—ã: {chembl_path}, {ttd_path}")

def process_all_files():
    chembl_dir = r"C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\chembl\–†–µ–∑—É–ª—å—Ç–∞—Ç—ã —Ä–∞—Å—á—ë—Ç–∞"
    ttd_dir = r"C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\ttd\–†–µ–∑—É–ª—å—Ç–∞—Ç—ã"
    output_dir = os.path.join(os.getcwd(), "Results")
    
    os.makedirs(output_dir, exist_ok=True)
    
    chembl_files = {f for f in os.listdir(chembl_dir) if f.endswith('.csv')}
    ttd_files = {f for f in os.listdir(ttd_dir) if f.endswith('.csv')}
    common_files = chembl_files & ttd_files
    
    if not common_files:
        print("‚ùå –ù–µ—Ç –æ–±—â–∏—Ö —Ñ–∞–π–ª–æ–≤ –¥–ª—è –æ–±—Ä–∞–±–æ—Ç–∫–∏")
        return
    
    column_order = [
        'Gene', 'UniProt ID', 'Protein name',
        'Drug ChEMBL ID', 'Drug name',
        'Drug Mechanism ChEMBL', 'pChEMBL value',
        'Activity type ChEMBL', 'Activity value ChEMBL',
        'Activity type TTD', 'Activity value TTD',
        'Max phase ChEMBL', 'Max phase TTD',
        'Action type ChEMBL', 'Mechanism of Action ChEMBL', 
        'Action type TTD', 'Database'
    ]
    
    file_pairs = []
    for filename in common_files:
        chembl_path = os.path.join(chembl_dir, filename)
        ttd_path = os.path.join(ttd_dir, filename)
        output_filename = os.path.join(output_dir, f"{os.path.splitext(filename)[0]}.integrated.csv")
        file_pairs.append((chembl_path, ttd_path, output_filename))
    
    process_files_safely(file_pairs, column_order)

if __name__ == '__main__':
    process_all_files()


üîç –ù–∞—á–∞—Ç–∞ –æ–±—Ä–∞–±–æ—Ç–∫–∞ —Ñ–∞–π–ª–æ–≤:
ChEMBL: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\chembl\–†–µ–∑—É–ª—å—Ç–∞—Ç—ã —Ä–∞—Å—á—ë—Ç–∞\ALL_depscore_-0.15_zscore_-1.csv
TTD: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\ttd\–†–µ–∑—É–ª—å—Ç–∞—Ç—ã\ALL_depscore_-0.15_zscore_-1.csv
–ù–∞—á–∞–ª–æ –ø–æ–∏—Å–∫–∞ —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤–∏–π –ø—Ä–µ–ø–∞—Ä–∞—Ç–æ–≤...


Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.71it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.71it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.69it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.70it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.73it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.69it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.70it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.72it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.69it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.70it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.72it/s]

–û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö...
–£–¥–∞–ª–µ–Ω–∏–µ –¥—É–±–ª–∏–∫–∞—Ç–æ–≤...
–§–∞–π–ª —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω: c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\ALL_depscore_-0.15_zscore_-1.integrated.csv
‚úÖ –£—Å–ø–µ—à–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω –∫–∞–∫ c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\ALL_depscore_-0.15_zscore_-1.integrated.csv
–í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫: 3094
–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º:
Database
ChEMBL          2953
ChEMBL & TTD      87
TTD               54
Name: count, dtype: int64

üîç –ù–∞—á–∞—Ç–∞ –æ–±—Ä–∞–±–æ—Ç–∫–∞ —Ñ–∞–π–ª–æ–≤:
ChEMBL: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\chembl\–†–µ–∑—É–ª—å—Ç–∞—Ç—ã —Ä–∞—Å—á—ë—Ç–∞\ALL_depscore_-0.15_zscore_-0.5.csv
TTD: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_

Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:22<00:00,  4.50it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.57it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.68it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.55it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.64it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.70it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.65it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.68it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.59it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.70it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.62it/s]

–û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö...
–£–¥–∞–ª–µ–Ω–∏–µ –¥—É–±–ª–∏–∫–∞—Ç–æ–≤...
–§–∞–π–ª —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω: c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\ALL_depscore_-0.15_zscore_-0.5.integrated.csv
‚úÖ –£—Å–ø–µ—à–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω –∫–∞–∫ c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\ALL_depscore_-0.15_zscore_-0.5.integrated.csv
–í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫: 10355
–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º:
Database
ChEMBL          9985
ChEMBL & TTD     202
TTD              168
Name: count, dtype: int64

üîç –ù–∞—á–∞—Ç–∞ –æ–±—Ä–∞–±–æ—Ç–∫–∞ —Ñ–∞–π–ª–æ–≤:
ChEMBL: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\chembl\–†–µ–∑—É–ª—å—Ç–∞—Ç—ã —Ä–∞—Å—á—ë—Ç–∞\AML_depscore_-0.15_zscore_-0.5.csv
TTD: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uni

Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.60it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.72it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.69it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.66it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.73it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.59it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.64it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.72it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.72it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.73it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.60it/s]

–û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö...
–£–¥–∞–ª–µ–Ω–∏–µ –¥—É–±–ª–∏–∫–∞—Ç–æ–≤...
–§–∞–π–ª —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω: c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\AML_depscore_-0.15_zscore_-0.5.integrated.csv
‚úÖ –£—Å–ø–µ—à–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω –∫–∞–∫ c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\AML_depscore_-0.15_zscore_-0.5.integrated.csv
–í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫: 4587
–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º:
Database
ChEMBL          4453
ChEMBL & TTD      78
TTD               56
Name: count, dtype: int64

üîç –ù–∞—á–∞—Ç–∞ –æ–±—Ä–∞–±–æ—Ç–∫–∞ —Ñ–∞–π–ª–æ–≤:
ChEMBL: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\chembl\–†–µ–∑—É–ª—å—Ç–∞—Ç—ã —Ä–∞—Å—á—ë—Ç–∞\ALL_depscore_-0.2_zscore_-0.5.csv
TTD: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_unipr

Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.76it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.76it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.74it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.76it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.76it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.74it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.76it/s]

–û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö...
–£–¥–∞–ª–µ–Ω–∏–µ –¥—É–±–ª–∏–∫–∞—Ç–æ–≤...
–§–∞–π–ª —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω: c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\ALL_depscore_-0.2_zscore_-0.5.integrated.csv
‚úÖ –£—Å–ø–µ—à–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω –∫–∞–∫ c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\ALL_depscore_-0.2_zscore_-0.5.integrated.csv
–í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫: 7704
–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º:
Database
ChEMBL          7421
ChEMBL & TTD     154
TTD              129
Name: count, dtype: int64

üîç –ù–∞—á–∞—Ç–∞ –æ–±—Ä–∞–±–æ—Ç–∫–∞ —Ñ–∞–π–ª–æ–≤:
ChEMBL: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\chembl\–†–µ–∑—É–ª—å—Ç–∞—Ç—ã —Ä–∞—Å—á—ë—Ç–∞\AML_depscore_-0.15_zscore_-1.5.csv
TTD: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_unipro

Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.72it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 45/45 [00:09<00:00,  4.75it/s]
Processing ChEMBL IDs: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4/4 [01:15<00:00, 18.92s/it]


–û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö...
–£–¥–∞–ª–µ–Ω–∏–µ –¥—É–±–ª–∏–∫–∞—Ç–æ–≤...
–§–∞–π–ª —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω: c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\AML_depscore_-0.15_zscore_-1.5.integrated.csv
‚úÖ –£—Å–ø–µ—à–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω –∫–∞–∫ c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\AML_depscore_-0.15_zscore_-1.5.integrated.csv
–í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫: 344
–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º:
Database
ChEMBL          326
ChEMBL & TTD     11
TTD               7
Name: count, dtype: int64

üîç –ù–∞—á–∞—Ç–∞ –æ–±—Ä–∞–±–æ—Ç–∫–∞ —Ñ–∞–π–ª–æ–≤:
ChEMBL: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\chembl\–†–µ–∑—É–ª—å—Ç–∞—Ç—ã —Ä–∞—Å—á—ë—Ç–∞\AML_depscore_-0.15_zscore_-1.csv
TTD: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_tt

Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.74it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:20<00:00,  4.76it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.76it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.73it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 51/51 [00:10<00:00,  4.75it/s]]
Processing ChEMBL IDs: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [03:28<00:00, 20.89s/it]


–û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö...
–£–¥–∞–ª–µ–Ω–∏–µ –¥—É–±–ª–∏–∫–∞—Ç–æ–≤...
–§–∞–π–ª —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω: c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\AML_depscore_-0.15_zscore_-1.integrated.csv
‚úÖ –£—Å–ø–µ—à–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω –∫–∞–∫ c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\AML_depscore_-0.15_zscore_-1.integrated.csv
–í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫: 973
–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º:
Database
ChEMBL          940
ChEMBL & TTD     19
TTD              14
Name: count, dtype: int64

üîç –ù–∞—á–∞—Ç–∞ –æ–±—Ä–∞–±–æ—Ç–∫–∞ —Ñ–∞–π–ª–æ–≤:
ChEMBL: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\chembl\–†–µ–∑—É–ª—å—Ç–∞—Ç—ã —Ä–∞—Å—á—ë—Ç–∞\ALL_depscore_-0.15_zscore_-1.5.csv
TTD: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\

Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.74it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 93/93 [00:19<00:00,  4.75it/s]
Processing ChEMBL IDs: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 7/7 [02:31<00:00, 21.70s/it]


–û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö...
–£–¥–∞–ª–µ–Ω–∏–µ –¥—É–±–ª–∏–∫–∞—Ç–æ–≤...
–§–∞–π–ª —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω: c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\ALL_depscore_-0.15_zscore_-1.5.integrated.csv
‚úÖ –£—Å–ø–µ—à–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω –∫–∞–∫ c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\ALL_depscore_-0.15_zscore_-1.5.integrated.csv
–í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫: 689
–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º:
Database
ChEMBL          652
ChEMBL & TTD     25
TTD              12
Name: count, dtype: int64

üîç –ù–∞—á–∞—Ç–∞ –æ–±—Ä–∞–±–æ—Ç–∫–∞ —Ñ–∞–π–ª–æ–≤:
ChEMBL: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\chembl\–†–µ–∑—É–ª—å—Ç–∞—Ç—ã —Ä–∞—Å—á—ë—Ç–∞\AML_depscore_-0.2_zscore_-1.csv
TTD: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd

Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.76it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.76it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.73it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 88/88 [00:18<00:00,  4.76it/s]
Processing ChEMBL IDs: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8/8 [02:52<00:00, 21.60s/it]


–û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö...
–£–¥–∞–ª–µ–Ω–∏–µ –¥—É–±–ª–∏–∫–∞—Ç–æ–≤...
–§–∞–π–ª —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω: c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\AML_depscore_-0.2_zscore_-1.integrated.csv
‚úÖ –£—Å–ø–µ—à–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω –∫–∞–∫ c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\AML_depscore_-0.2_zscore_-1.integrated.csv
–í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫: 799
–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º:
Database
ChEMBL          773
ChEMBL & TTD     14
TTD              12
Name: count, dtype: int64

üîç –ù–∞—á–∞—Ç–∞ –æ–±—Ä–∞–±–æ—Ç–∫–∞ —Ñ–∞–π–ª–æ–≤:
ChEMBL: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\chembl\–†–µ–∑—É–ª—å—Ç–∞—Ç—ã —Ä–∞—Å—á—ë—Ç–∞\ALL_depscore_-0.2_zscore_-1.5.csv
TTD: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\ttd

Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.74it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.74it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 67/67 [00:14<00:00,  4.75it/s]
Processing ChEMBL IDs: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 7/7 [02:26<00:00, 20.91s/it]


–û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö...
–£–¥–∞–ª–µ–Ω–∏–µ –¥—É–±–ª–∏–∫–∞—Ç–æ–≤...
–§–∞–π–ª —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω: c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\ALL_depscore_-0.2_zscore_-1.5.integrated.csv
‚úÖ –£—Å–ø–µ—à–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω –∫–∞–∫ c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\ALL_depscore_-0.2_zscore_-1.5.integrated.csv
–í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫: 662
–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º:
Database
ChEMBL          627
ChEMBL & TTD     24
TTD              11
Name: count, dtype: int64

üîç –ù–∞—á–∞—Ç–∞ –æ–±—Ä–∞–±–æ—Ç–∫–∞ —Ñ–∞–π–ª–æ–≤:
ChEMBL: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\chembl\–†–µ–∑—É–ª—å—Ç–∞—Ç—ã —Ä–∞—Å—á—ë—Ç–∞\ALL_depscore_-0.2_zscore_-1.csv
TTD: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\t

Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.76it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.76it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.74it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:20<00:00,  4.76it/s]

–û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö...
–£–¥–∞–ª–µ–Ω–∏–µ –¥—É–±–ª–∏–∫–∞—Ç–æ–≤...
–§–∞–π–ª —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω: c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\ALL_depscore_-0.2_zscore_-1.integrated.csv
‚úÖ –£—Å–ø–µ—à–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω –∫–∞–∫ c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\ALL_depscore_-0.2_zscore_-1.integrated.csv
–í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫: 2676
–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º:
Database
ChEMBL          2544
ChEMBL & TTD      84
TTD               48
Name: count, dtype: int64

üîç –ù–∞—á–∞—Ç–∞ –æ–±—Ä–∞–±–æ—Ç–∫–∞ —Ñ–∞–π–ª–æ–≤:
ChEMBL: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\chembl\–†–µ–∑—É–ª—å—Ç–∞—Ç—ã —Ä–∞—Å—á—ë—Ç–∞\AML_depscore_-0.2_zscore_-0.5.csv
TTD: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd

Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.76it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.76it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.76it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]

–û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö...
–£–¥–∞–ª–µ–Ω–∏–µ –¥—É–±–ª–∏–∫–∞—Ç–æ–≤...
–§–∞–π–ª —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω: c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\AML_depscore_-0.2_zscore_-0.5.integrated.csv
‚úÖ –£—Å–ø–µ—à–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω –∫–∞–∫ c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\AML_depscore_-0.2_zscore_-0.5.integrated.csv
–í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫: 3358
–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º:
Database
ChEMBL          3276
ChEMBL & TTD      42
TTD               40
Name: count, dtype: int64

üîç –ù–∞—á–∞—Ç–∞ –æ–±—Ä–∞–±–æ—Ç–∫–∞ —Ñ–∞–π–ª–æ–≤:
ChEMBL: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\chembl\–†–µ–∑—É–ª—å—Ç–∞—Ç—ã —Ä–∞—Å—á—ë—Ç–∞\AML_depscore_-0.2_zscore_-1.5.csv
TTD: C:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot

Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.75it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.76it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:21<00:00,  4.72it/s]
Building family map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 45/45 [00:09<00:00,  4.74it/s]
Processing ChEMBL IDs: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4/4 [01:15<00:00, 18.90s/it]

–û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö...
–£–¥–∞–ª–µ–Ω–∏–µ –¥—É–±–ª–∏–∫–∞—Ç–æ–≤...
–§–∞–π–ª —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω: c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\AML_depscore_-0.2_zscore_-1.5.integrated.csv
‚úÖ –£—Å–ø–µ—à–Ω–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω –∫–∞–∫ c:\Users\rusla\OneDrive\–†–∞–±–æ—á–∏–π —Å—Ç–æ–ª\–î–∏–ø–ª–æ–º\–ì–µ–Ω—ã\chembl_uniprot_ttd\data_integration\Results\AML_depscore_-0.2_zscore_-1.5.integrated.csv
–í—Å–µ–≥–æ —Å—Ç—Ä–æ–∫: 344
–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –ø–æ –∏—Å—Ç–æ—á–Ω–∏–∫–∞–º:
Database
ChEMBL          326
ChEMBL & TTD     11
TTD               7
Name: count, dtype: int64



