<a href="https://colab.research.google.com/github/KoliaUS/F7PMIPSMB-S/blob/main/Ceabin_uloha1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd


# Načtení datasetu
df = pd.read_csv('/content/compounds.csv', delimiter=';')

# Extrakce sloupce s SMILES řetězci
smiles_list = df['smiles'].tolist()

print(df.head())

                                                name  \
0    Ethyl 2-amino-3-(3,4-dihydroxyphenyl)propanoate   
1  (S)-Methyl 2-amino-3-(3,4-dihydroxyphenyl)prop...   
2                                        Etilevodopa   
3                                        Foslevodopa   
4                                         Melevodopa   

                                  smiles  \
0         CCOC(=O)C(CC1=CC(=C(C=C1)O)O)N   
1       COC(=O)C(CC1=CC(=C(C=C1)O)O)N.Cl   
2         CCOC(=O)C(CC1=CC(=C(C=C1)O)O)N   
3  C1=CC(=C(C=C1CC(C(=O)O)N)O)OP(=O)(O)O   
4          COC(=O)C(CC1=CC(=C(C=C1)O)O)N   

                                              pubmed  
0  https://pubchem.ncbi.nlm.nih.gov/compound/1179...  
1  https://pubchem.ncbi.nlm.nih.gov/compound/1013...  
2   https://pubchem.ncbi.nlm.nih.gov/compound/170345  
3   https://pubchem.ncbi.nlm.nih.gov/compound/127766  
4    https://pubchem.ncbi.nlm.nih.gov/compound/23497  


In [5]:
!pip install rdkit-pypi

Collecting rdkit-pypi
  Downloading rdkit_pypi-2022.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.9 kB)
Downloading rdkit_pypi-2022.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (29.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.4/29.4 MB[0m [31m38.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdkit-pypi
Successfully installed rdkit-pypi-2022.9.5


In [6]:
from rdkit import Chem
from rdkit.Chem import AllChem

# Převod SMILES řetězců na molekuly v RDKit
molecules = [Chem.MolFromSmiles(smiles) for smiles in smiles_list]

# Výpočet Morgan fingerprintů pro každou molekulu
fingerprints = [AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024) for mol in molecules]

In [7]:
from rdkit import DataStructs

# Definice referenční sloučeniny (Levodopa)
reference_smiles = "C1=CC(=C(C=C1CC(C(=O)O)N)O)O"
reference_mol = Chem.MolFromSmiles(reference_smiles)
reference_fp = AllChem.GetMorganFingerprintAsBitVect(reference_mol, 2, nBits=1024)

# Výpočet Tanimoto podobnosti pro každou sloučeninu
tanimoto_similarities = [DataStructs.TanimotoSimilarity(reference_fp, fp) for fp in fingerprints]

# Přidání skóre podobnosti do dataframe
df['Tanimoto Similarity'] = tanimoto_similarities

# Zobrazení dataframe s skóre podobnosti
print(df)

                                                name  \
0    Ethyl 2-amino-3-(3,4-dihydroxyphenyl)propanoate   
1  (S)-Methyl 2-amino-3-(3,4-dihydroxyphenyl)prop...   
2                                        Etilevodopa   
3                                        Foslevodopa   
4                                         Melevodopa   
5                                    L-DOPA (sodium)   
6                      Tyrosine, 3-hydroxy-O-methyl-   
7                                  3-Methoxytyrosine   

                                  smiles  \
0         CCOC(=O)C(CC1=CC(=C(C=C1)O)O)N   
1       COC(=O)C(CC1=CC(=C(C=C1)O)O)N.Cl   
2         CCOC(=O)C(CC1=CC(=C(C=C1)O)O)N   
3  C1=CC(=C(C=C1CC(C(=O)O)N)O)OP(=O)(O)O   
4          COC(=O)C(CC1=CC(=C(C=C1)O)O)N   
5      C1=CC(=C(C=C1CC(C(=O)O)N)O)O.[Na]   
6          COC1=C(C=C(C=C1)CC(C(=O)O)N)O   
7          COC1=C(C=CC(=C1)CC(C(=O)O)N)O   

                                              pubmed  Tanimoto Similarity  
0  https://pubchem.ncb

In [8]:
# Seřazení dataframe podle Tanimoto podobnosti (sestupně)
df_sorted = df.sort_values(by='Tanimoto Similarity', ascending=False)

# Zobrazení top 3 sloučenin
top_3_hits = df_sorted.head(3)
print(top_3_hits)

                            name                             smiles  \
5                L-DOPA (sodium)  C1=CC(=C(C=C1CC(C(=O)O)N)O)O.[Na]   
4                     Melevodopa      COC(=O)C(CC1=CC(=C(C=C1)O)O)N   
6  Tyrosine, 3-hydroxy-O-methyl-      COC1=C(C=C(C=C1)CC(C(=O)O)N)O   

                                              pubmed  Tanimoto Similarity  
5  https://pubchem.ncbi.nlm.nih.gov/compound/1386...             0.964286  
4    https://pubchem.ncbi.nlm.nih.gov/compound/23497             0.676471  
6  https://pubchem.ncbi.nlm.nih.gov/compound/1340...             0.666667  
