# Pacote de checagem de grupos funcionais 

Importação das bibliotecas

In [10]:
import numpy as np
import pandas as pd
from rdkit import Chem

## Criação da Classe FindFG
Find Functional Groups

In [11]:
class FindFG:
    def __init__(self):
        path = r'C:\Users\bores\OneDrive\repositorios_github\Python\Programs\Pos\fg_arrumado.pkl'
        self.match_list = []
        self.match_dict = {}
        self.path_SMARTS_functional_groups(path)

    def path_SMARTS_functional_groups(self, path):
        '''
        Abre o dataframe dos Smarts de cada grupo funcional
        '''
        self.df_fg = pd.read_pickle(path, compression='zip')
        
    def testMatch(self, smile: str, smart: str):
        """
        testa se tem um match, retorna 1 or 0
        """
        try:
            mol = Chem.MolFromSmiles(smile)
            patt = Chem.MolFromSmarts(smart, mergeHs=True)

            hit_bonds = []
            hits = list(mol.GetSubstructMatches(patt))
            hit_ats = hits[0]
            freq = len(hits)

            for bond in patt.GetBonds():
                aid1 = hit_ats[bond.GetBeginAtomIdx()]
                aid2 = hit_ats[bond.GetEndAtomIdx()]
                hit_bonds.append(mol.GetBondBetweenAtoms(aid1,aid2).GetIdx())
            return freq
        except:
            return 0

    def findFunctionalGroups(self, smile):
        '''
        Pesquisa de grupos funcionais para um único smile
        '''
        df_copy = self.df_fg[['Name']].copy()
        df_copy['Frequency'] = self.df_fg['SMARTS'].apply(lambda x: self.testMatch(smile, x))
        df_copy.rename(columns={'Name':'Functional Groups'}, inplace=True)
        self.match_list = df_copy['Frequency'].values
        return df_copy.query('Frequency != 0')
    
    def functionalGroupASbitvector(self, arg): 
        '''
        retorna quais grupos funcionais estão presentes
        '''
        ar  = self.match_list
        ar[ar > 1] = 1
        return ar

## Demonstração
Gerando um objeto FindFG em uma variável

In [12]:
search = FindFG()


Conferindo se o Data Frame importou de maneira adequada

In [13]:
search.df_fg

Unnamed: 0,Name,SMARTS,FullText
0,Primary_carbon,[CX4H3][#6],
1,Secondary_carbon,[CX4H2]([#6])[#6],
2,Tertiary_carbon,[CX4H1]([#6])([#6])[#6],
3,Quaternary_carbon,[CX4]([#6])([#6])([#6])[#6],
4,Alkene,"[CX3;$([H2]),$([H1][#6]),$(C([#6])[#6])]=[CX3;...",sp2 C may be substituted only by C or H - does...
...,...,...,...
260,Bridged_rings,[R;$(*(@*)(@*)@*);!$([D4R;$(*(@*)(@*)(@*)@*)])...,"part of two or more rings, not spiro, not anne..."
261,Conjugated_double_bond,"*=*[*]=,#,:[*]",
262,Conjugated_tripple_bond,"*#*[*]=,#,:[*]",
263,Trifluoromethyl,"[FX1][CX4;!$([H0][Cl,Br,I]);!$([F][C]([F])([F]...","C with three F attached, connected to anything..."


Declaração de alguns SMILES em um dicionário, com o nome farmacêutico como chave, e o SMILE como valor.

In [14]:
smi = {'caffeine': 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C',
       'nimesulide': 'CS(=O)(=O)NC1=C(C=C(C=C1)[N+](=O)[O-])OC2=CC=CC=C2',
       'testosterone': 'CC12CCC3C(C1CCC2O)CCC4=CC(=O)CCC34C',
       'dipyrone': 'CC1=C(C(=O)N(N1C)C2=CC=CC=C2)N(C)CS(=O)(=O)[O-].[Na+]',
       'amoxicillin': 'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C',
       'escitalopram': 'CN(C)CCCC1(C2=C(CO1)C=C(C=C2)C#N)C3=CC=C(C=C3)F'} 

In [23]:
search.findFunctionalGroups(smi['nimesulide'])

Unnamed: 0,Functional Groups,Frequency
18,Diarylether,1
170,Nitro,1
193,Sulfonamide,1
196,Sulfonic_derivative,1
261,Conjugated_double_bond,2


In [18]:
search.functionalGroupASbitvector(smi['nimesulide'])

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0], dtype=int64)