# Projeto análise de dados biogênicos

## Instalando a biblioteca principal
---

- *Breve descrição sobre a biblioteca:*
    O RDKIT é um kit de ferramenta de código aberto para quimioformatica, ou seja, a biblioteca auxiliará no processo de operações com moléculas, geração de descrição das moléculas, utilitários para aprendizado de máquinas, entre outros.
    
fonte: https://www.rdkit.org/docs/Overview.html#open-source-toolkit-for-cheminformatics

In [1]:
! pip install rdkit



## Importando bibliotecas

In [2]:
from rdkit import Chem

In [3]:
import pandas as pd 

In [20]:
from rdkit.Chem import Descriptors

In [24]:
from rdkit.Chem import Crippen

In [25]:
from rdkit.Chem import Lipinski

In [26]:
from rdkit.Chem import rdMolDescriptors

### Documentação das bibliotecas
---
- pandas: https://pandas.pydata.org/docs/
- rdkit.Chem: https://www.rdkit.org/docs/source/rdkit.Chem.html
- rdkit.Chem.Descriptors: https://www.rdkit.org/docs/source/rdkit.Chem.Descriptors.html
- rdkit.Chem.Crippen: https://www.rdkit.org/docs/source/rdkit.Chem.Crippen.html
- rdkit.Chem.Lipinski: https://www.rdkit.org/docs/source/rdkit.Chem.Lipinski.html
- rdkit.Chem.rdMolDescriptors: https://www.rdkit.org/docs/source/rdkit.Chem.rdMolDescriptors.html

## Chamando o arquivo 

In [6]:
# utilizamos o pandas para a leitura do arquivo do tipo CSV
df = pd.read_csv('AA+biogenic.csv', sep=',')
df

Unnamed: 0,zinc_id,smiles
0,ZINC000149277742,CO[C@H]([C@H](C)O)[C@H](OC)[C@H](O)C=O
1,ZINC000004403299,C[C@H](O)[C@H](O)[C@H](O)[C@H](O)[C@H](O)CO
2,ZINC000013435545,O=C1C[C@H](O)[C@H]([C@H](O)CO)O1
3,ZINC000256066192,OCCN1C[C@H](O)[C@@H](O)[C@H]1CO
4,ZINC000257519833,N[C@]1(CO)OC[C@H](CO)[C@H]1O
...,...,...
3613,ZINC000003649889,NC(=O)NOCC[C@H](N)C(=O)O
3614,ZINC000379325533,N=C(CO)[C@H](O)[C@H](O)[C@H](O)CO
3615,ZINC000100201838,O=C(CO)[C@H](O)[C@@H](O)C(=O)CO
3616,ZINC000100806197,OC[C@@H]1N[C@H](CO)[C@@H](O)[C@@H](O)[C@@H]1O


<div class="alert alert-block alert-info">
<b> OBS: nosso arquivo tem o 'smiles', que dará a base para calcular todos os atributos necessários </b>
</div>

In [17]:
# Criando uma lista somente com os SMILES 
SMILES = []
for i in range(0,3618):
    separando_smiles = df.smiles[i]
    SMILES.append(separando_smiles) 

In [18]:
SMILES

['CO[C@H]([C@H](C)O)[C@H](OC)[C@H](O)C=O',
 'C[C@H](O)[C@H](O)[C@H](O)[C@H](O)[C@H](O)CO',
 'O=C1C[C@H](O)[C@H]([C@H](O)CO)O1',
 'OCCN1C[C@H](O)[C@@H](O)[C@H]1CO',
 'N[C@]1(CO)OC[C@H](CO)[C@H]1O',
 'N[C@H]1[C@H](O)[C@@H](O)[C@H](CO)O[C@H]1O',
 'NC(=O)N[C@@H]1NC(=O)NC1=O',
 'O=c1[nH]c(O)c(O)c(=O)[nH]1',
 'OC[C@H](O)[C@@H]1O[C@H](O)[C@H](O)[C@H]1O',
 'O=C(O)[C@H](O)[C@@H]1OC[C@@H](O)[C@@H]1O',
 'O[C@@H]1[C@@H](O)[C@H]2[C@@H](O)CCN2C[C@H]1O',
 'O=C[C@@H]1OC(=O)[C@H](O)[C@H](O)[C@H]1O',
 'CC(=O)N1CCN[C@@H](C(N)=O)C1',
 'CO[C@H]1[C@@H](O)[C@@H](CO)[C@@H](O)[C@H]1O',
 'O=C1C[C@@H](O)[C@H](CO)O1',
 'O[C@@H]1CO[C@H]2[C@H]1OC[C@H]2O',
 'COC(=O)[C@H](N)CC(N)=O',
 'COC(=O)NNC(N)=O',
 'O=C1C[C@@H](CO)OC[C@H]1O',
 'CO[C@H]1[C@@H](O)[C@H](O)CO[C@H]1O',
 'C[C@@]1(O)[C@@H](O)[C@H](O)[C@@H](O)[C@H](O)[C@H]1O',
 'C[C@H](O)[C@H](O)[C@H](O)[C@@H](O)[C@H](O)CO',
 'C[S@@](=O)CCNC(=O)[C@@H](O)CN',
 'CN1C(=O)NC(=O)C1=O',
 'O=C1N[C@@H]2NC(=O)N(CCO)[C@H]2N1',
 'NC1=N[C@@H]2N=CN=C2C(=O)N1',
 'N[C@@H](C(=O)O)[C@H

In [19]:
len(SMILES)

3618

## Calculando a fórmula molecular 

In [47]:
Formula_molecular = []
for i in range (0, len(SMILES)):
    Formula_molecular.append(rdMolDescriptors.CalcMolFormula(Chem.MolFromSmiles(SMILES[i])))

In [48]:
Formula_molecular

['C8H16O5',
 'C7H16O6',
 'C6H10O5',
 'C7H15NO4',
 'C6H13NO4',
 'C6H13NO5',
 'C4H6N4O3',
 'C4H4N2O4',
 'C6H12O6',
 'C6H10O6',
 'C8H15NO4',
 'C6H8O6',
 'C7H13N3O2',
 'C7H14O5',
 'C5H8O4',
 'C6H10O4',
 'C5H10N2O3',
 'C3H7N3O3',
 'C6H10O4',
 'C6H12O5',
 'C7H14O6',
 'C7H16O6',
 'C6H14N2O3S',
 'C4H4N2O3',
 'C6H10N4O3',
 'C5H5N5O',
 'C5H9NO6',
 'C6H10O6',
 'C6H10O6',
 'C6H9NO4',
 'C7H13N5O2',
 'C7H14O5',
 'C7H12O5',
 'C6H10O6',
 'C7H16N4O2',
 'C5H11NO5S',
 'C6H12O5',
 'C5H10O5',
 'C6H9NO4',
 'C7H14O5',
 'C5H12O4',
 'C7H12O4',
 'C6H9NO6',
 'C7H13NO3',
 'C4H10NO6P',
 'C4H8O4',
 'C6H13NO5',
 'C8H13NO3',
 'C3H3N3O3',
 'C6H8O7',
 'C4H6N2O4',
 'C8H14O5',
 'C5H8N2O3',
 'C7H12O6',
 'C7H8O5',
 'C7H10O4',
 'C7H14O5',
 'C7H14O6',
 'C6H10O6',
 'C6H13NO5',
 'C7H14N2O4',
 'C7H13NO5',
 'C8H15N3O2',
 'C6H12N2O3',
 'C7H15NO4',
 'C7H8O4',
 'C6H10O7',
 'C8H17NO3',
 'C5H9NO4',
 'C6H12O5',
 'C4H8O4',
 'C5H8O5',
 'C6H11FO5',
 'C4H10O3',
 'C6H14O5',
 'C3H4O6S',
 'C7H15NO4',
 'C11H7NO2',
 'C4H3N3O4',
 'C5H5N5O',
 'C

## Calculando o MolWt de cada molécula

In [21]:
MolWt = []
for i in range (0, len(SMILES)):
     MolWt.append(Descriptors.ExactMolWt(Chem.MolFromSmiles(SMILES[i])))

In [22]:
MolWt

[192.099773612,
 196.094688232,
 162.05282342,
 177.10010796,
 163.084457896,
 179.079372516,
 158.043990052,
 144.017106608,
 180.063388104,
 178.04773803999998,
 189.10010796,
 176.032087976,
 171.100776656,
 178.084123548,
 132.042258736,
 146.0579088,
 146.06914218,
 133.048741084,
 146.0579088,
 164.068473484,
 194.079038168,
 196.094688232,
 194.072513308,
 128.022191988,
 186.07529018,
 151.04940978,
 179.042987008,
 178.04773803999998,
 178.04773804,
 159.053157768,
 199.106924656,
 178.084123548,
 176.068473484,
 178.04773804,
 188.127325752,
 197.035793452,
 164.068473484,
 150.05282342,
 159.053157768,
 178.084123548,
 136.073558864,
 160.073558864,
 191.042987008,
 159.089543276,
 199.02457367,
 120.04225873600001,
 179.079372516,
 171.089543276,
 129.017440956,
 192.02700259600002,
 146.032756672,
 190.084123548,
 144.053492116,
 192.063388104,
 172.037173356,
 158.0579088,
 178.084123548,
 194.079038168,
 178.04773803999998,
 179.079372516,
 190.095356928,
 191.079372516,

## Calculando o MolLogP

In [28]:
MolLogP = []
for i in range (0, len(SMILES)):
    MolLogP.append(Crippen.MolLogP(Chem.MolFromSmiles(SMILES[i])))

In [29]:
MolLogP

[-1.0430000000000004,
 -3.1969000000000007,
 -1.9840000000000002,
 -2.622999999999999,
 -2.3666999999999994,
 -3.2550000000000003,
 -2.1798,
 -1.5256000000000003,
 -3.2214000000000005,
 -2.4474999999999993,
 -2.4820999999999986,
 -2.8066,
 -1.7079999999999989,
 -2.2937999999999996,
 -1.3449,
 -1.4942000000000004,
 -1.6378999999999992,
 -1.0743000000000003,
 -1.3024,
 -1.9281999999999997,
 -3.444500000000001,
 -3.1969000000000007,
 -2.1992999999999983,
 -1.3054000000000001,
 -2.0310999999999995,
 -1.7600999999999998,
 -2.7952999999999997,
 -2.2557000000000005,
 -3.0132,
 -2.5056,
 -2.2991299999999977,
 -1.5381000000000002,
 -1.6790999999999998,
 -3.0132000000000008,
 -1.0924299999999987,
 -1.0630999999999993,
 -2.3480000000000003,
 -2.7381000000000006,
 -1.3373999999999997,
 -1.5397000000000005,
 -1.9171,
 -1.322,
 -1.4261999999999997,
 -1.4475000000000005,
 -1.1023000000000003,
 -2.0989999999999998,
 -3.2533999999999996,
 -1.7966999999999997,
 -2.2485000000000004,
 -3.3276,
 -1.4733999

## Calculando Número de Aceites de Ligação de Hidrogênio

In [31]:
NumHAcceptors = []
for i in range (0, len(SMILES)):
    NumHAcceptors.append(Lipinski.NumHAcceptors(Chem.MolFromSmiles(SMILES[i])))

In [32]:
NumHAcceptors

[5,
 6,
 5,
 5,
 5,
 6,
 3,
 4,
 6,
 5,
 5,
 6,
 3,
 5,
 4,
 4,
 4,
 3,
 4,
 5,
 6,
 6,
 4,
 3,
 3,
 5,
 5,
 5,
 6,
 4,
 4,
 5,
 5,
 6,
 3,
 4,
 5,
 5,
 3,
 5,
 4,
 4,
 4,
 4,
 4,
 4,
 6,
 4,
 3,
 7,
 3,
 5,
 4,
 6,
 4,
 4,
 5,
 6,
 5,
 6,
 4,
 5,
 3,
 3,
 5,
 4,
 6,
 4,
 3,
 5,
 4,
 5,
 5,
 3,
 5,
 4,
 5,
 2,
 4,
 5,
 5,
 4,
 6,
 5,
 6,
 4,
 4,
 5,
 4,
 3,
 3,
 3,
 5,
 3,
 6,
 4,
 4,
 4,
 4,
 3,
 7,
 5,
 6,
 4,
 4,
 5,
 4,
 3,
 4,
 6,
 4,
 4,
 4,
 5,
 5,
 5,
 6,
 4,
 4,
 3,
 6,
 5,
 5,
 5,
 3,
 4,
 5,
 4,
 6,
 4,
 6,
 6,
 5,
 5,
 5,
 4,
 4,
 4,
 5,
 5,
 3,
 4,
 5,
 3,
 6,
 4,
 4,
 5,
 4,
 3,
 5,
 3,
 3,
 4,
 5,
 4,
 5,
 5,
 4,
 6,
 5,
 5,
 4,
 5,
 6,
 4,
 6,
 4,
 5,
 3,
 4,
 5,
 6,
 5,
 4,
 4,
 6,
 4,
 3,
 4,
 6,
 3,
 4,
 4,
 4,
 4,
 5,
 6,
 3,
 4,
 6,
 4,
 4,
 5,
 6,
 4,
 5,
 4,
 3,
 3,
 4,
 5,
 4,
 3,
 4,
 4,
 6,
 4,
 5,
 6,
 3,
 6,
 4,
 5,
 5,
 4,
 7,
 5,
 4,
 4,
 6,
 6,
 6,
 7,
 6,
 4,
 5,
 3,
 4,
 5,
 4,
 3,
 4,
 4,
 4,
 4,
 4,
 6,
 3,
 4,
 4,
 4,
 6,
 6,
 4,
 3,
 6,
 4,
 5,
 5,


## Número de doadores de ligações de hidrogênio

In [34]:
NumHDonors = []
for i in range (0, len(SMILES)):
    NumHDonors.append(Lipinski.NumHDonors(Chem.MolFromSmiles(SMILES[i])))

In [35]:
NumHDonors

[2,
 6,
 3,
 4,
 4,
 5,
 4,
 4,
 5,
 4,
 4,
 3,
 2,
 4,
 2,
 2,
 2,
 3,
 2,
 3,
 6,
 6,
 3,
 1,
 4,
 2,
 5,
 4,
 4,
 4,
 4,
 3,
 2,
 4,
 5,
 3,
 4,
 4,
 2,
 3,
 4,
 3,
 4,
 4,
 4,
 3,
 5,
 4,
 3,
 4,
 3,
 3,
 3,
 4,
 3,
 3,
 3,
 5,
 4,
 5,
 4,
 4,
 3,
 3,
 4,
 2,
 5,
 4,
 3,
 3,
 3,
 3,
 4,
 2,
 5,
 2,
 5,
 2,
 3,
 2,
 4,
 4,
 5,
 4,
 5,
 4,
 4,
 5,
 4,
 3,
 3,
 2,
 3,
 3,
 5,
 3,
 2,
 3,
 3,
 4,
 4,
 1,
 5,
 2,
 3,
 3,
 4,
 3,
 3,
 5,
 3,
 3,
 4,
 2,
 3,
 4,
 6,
 4,
 3,
 2,
 6,
 5,
 5,
 3,
 2,
 3,
 4,
 3,
 2,
 4,
 5,
 4,
 4,
 2,
 3,
 3,
 4,
 2,
 5,
 3,
 4,
 4,
 4,
 4,
 6,
 4,
 2,
 5,
 4,
 3,
 3,
 3,
 2,
 3,
 3,
 1,
 2,
 5,
 3,
 6,
 4,
 4,
 3,
 5,
 3,
 6,
 5,
 4,
 3,
 2,
 4,
 4,
 5,
 0,
 3,
 3,
 5,
 4,
 3,
 4,
 6,
 1,
 4,
 3,
 4,
 2,
 2,
 5,
 3,
 2,
 5,
 4,
 3,
 5,
 4,
 4,
 3,
 4,
 3,
 1,
 3,
 5,
 3,
 3,
 4,
 3,
 5,
 3,
 4,
 5,
 5,
 4,
 4,
 3,
 1,
 3,
 0,
 3,
 2,
 3,
 5,
 4,
 2,
 1,
 3,
 2,
 5,
 4,
 4,
 3,
 4,
 3,
 4,
 3,
 3,
 4,
 2,
 4,
 3,
 3,
 2,
 4,
 3,
 5,
 3,
 3,
 5,
 4,
 3,
 3,


## Calculando número de títulos rotativos

In [38]:
NumRotableBonds = []
for i in range (0, len(SMILES)):
    NumRotableBonds.append(Lipinski.NumRotatableBonds(Chem.MolFromSmiles(SMILES[i])))

In [39]:
NumRotableBonds

[6,
 5,
 2,
 3,
 2,
 1,
 1,
 0,
 2,
 2,
 0,
 1,
 1,
 2,
 1,
 0,
 3,
 0,
 1,
 1,
 0,
 5,
 5,
 0,
 2,
 0,
 4,
 5,
 2,
 1,
 1,
 2,
 3,
 0,
 5,
 5,
 5,
 4,
 2,
 1,
 4,
 0,
 5,
 0,
 5,
 3,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 5,
 5,
 2,
 1,
 4,
 3,
 4,
 3,
 1,
 1,
 5,
 2,
 4,
 2,
 3,
 1,
 1,
 3,
 4,
 3,
 3,
 0,
 1,
 0,
 0,
 1,
 2,
 5,
 1,
 3,
 4,
 4,
 5,
 2,
 2,
 1,
 3,
 1,
 5,
 3,
 2,
 4,
 3,
 2,
 0,
 0,
 1,
 1,
 5,
 1,
 1,
 4,
 3,
 3,
 5,
 4,
 0,
 6,
 0,
 2,
 0,
 5,
 1,
 2,
 5,
 0,
 5,
 5,
 3,
 1,
 0,
 1,
 4,
 4,
 5,
 6,
 0,
 4,
 1,
 5,
 3,
 3,
 0,
 2,
 3,
 0,
 1,
 1,
 6,
 1,
 1,
 1,
 6,
 4,
 2,
 2,
 0,
 1,
 1,
 0,
 3,
 1,
 5,
 5,
 1,
 1,
 3,
 0,
 0,
 5,
 5,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 2,
 5,
 0,
 1,
 3,
 3,
 1,
 4,
 3,
 1,
 1,
 6,
 1,
 2,
 2,
 1,
 3,
 0,
 5,
 2,
 1,
 5,
 4,
 3,
 0,
 0,
 2,
 1,
 3,
 7,
 4,
 5,
 2,
 1,
 1,
 5,
 1,
 3,
 1,
 0,
 4,
 0,
 0,
 0,
 4,
 1,
 1,
 3,
 6,
 2,
 0,
 5,
 5,
 4,
 3,
 0,
 3,
 5,
 4,
 0,
 5,
 1,
 6,
 1,
 4,
 0,
 3,
 2,
 1,
 2,
 4,
 1,
 4,
 4,
 1,


## Calculando o número de anéis 

In [41]:
RingCount = []
for i in range (0, len(SMILES)):
    RingCount.append(Lipinski.RingCount(Chem.MolFromSmiles(SMILES[i])))

In [42]:
RingCount

[0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 2,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 2,
 2,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 2,
 0,
 0,
 1,
 3,
 1,
 2,
 0,
 2,
 1,
 2,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 2,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 2,
 2,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 2,
 0,
 2,
 0,
 1,
 0,
 1,
 2,
 0,
 1,
 0,
 2,
 2,
 1,
 2,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 2,
 1,
 1,
 0,
 1,
 1,
 0,
 2,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 2,
 0,
 2,
 0,
 0,
 0,
 2,
 1,
 0,
 2,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 2,
 1,
 1,
 3,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 2,
 2,
 0,
 0,
 1,
 1,
 1,
 2,
 1,
 1,
 2,
 1,
 1,
 0,
 2,
 0,
 0,
 1,
 2,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 2,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 2,
 1,
 1,
 2,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 2,
 0,
 2,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 2,
 0,
 0,
 0,
 2,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,


## Calculando  área de superfície polar topológica (TPSA)

In [43]:
TPSA = []
for i in range (0, len(SMILES)):
    TPSA.append(Descriptors.TPSA(Chem.MolFromSmiles(SMILES[i])))

In [44]:
TPSA

[75.99000000000001,
 121.38000000000001,
 86.99000000000001,
 84.16000000000001,
 95.94000000000001,
 116.17000000000002,
 113.32,
 106.18,
 110.38000000000001,
 107.22000000000001,
 84.16,
 104.06000000000002,
 75.43,
 90.15,
 66.76,
 58.92,
 95.41,
 93.44999999999999,
 66.76,
 79.15,
 121.38000000000001,
 121.38000000000001,
 92.42,
 66.48,
 93.7,
 92.19999999999999,
 141.08,
 115.06,
 107.22000000000001,
 103.78000000000002,
 114.80000000000001,
 79.15,
 75.99000000000001,
 107.22000000000001,
 125.22000000000001,
 103.7,
 97.99000000000001,
 97.99000000000001,
 77.84,
 79.15,
 80.92,
 77.75999999999999,
 137.92000000000002,
 72.72,
 130.07999999999998,
 77.76,
 116.17000000000002,
 72.72,
 98.57999999999998,
 116.45000000000002,
 95.50000000000001,
 79.15,
 81.92,
 99.38000000000001,
 94.83000000000001,
 77.76,
 86.99000000000001,
 118.22000000000001,
 107.22000000000001,
 116.17000000000002,
 112.65000000000002,
 101.23,
 84.22,
 92.42,
 81.95,
 70.06,
 135.29,
 72.72,
 86.6300000

In [54]:
zinc_id = []
for i in range (0, len(SMILES)):
    zinc_id.append(df['zinc_id'][i])

In [55]:
dicionario_de_dados = {
    'Zinc_ID': zinc_id,
    'SMILES': SMILES,
    'MolecularFormula': Molecular_formula,
    'MolWt': MolWt,
    'MolLogp': MolLogP,
    'HAcceptors': NumHAcceptors,
    'HDonors': NumHDonors,
    'RotableBonds': NumRotableBonds,
    'RindCount': RingCount,
    'TPSA': TPSA,}

In [56]:
#criando data frame
pd.DataFrame(dicionario_de_dados)

Unnamed: 0,Zinc_ID,SMILES,MolecularFormula,MolWt,MolLogp,HAcceptors,HDonors,RotableBonds,RindCount,TPSA
0,ZINC000149277742,CO[C@H]([C@H](C)O)[C@H](OC)[C@H](O)C=O,C8H16O5,192.099774,-1.04300,5,2,6,0,75.99
1,ZINC000004403299,C[C@H](O)[C@H](O)[C@H](O)[C@H](O)[C@H](O)CO,C7H16O6,196.094688,-3.19690,6,6,5,0,121.38
2,ZINC000013435545,O=C1C[C@H](O)[C@H]([C@H](O)CO)O1,C6H10O5,162.052823,-1.98400,5,3,2,1,86.99
3,ZINC000256066192,OCCN1C[C@H](O)[C@@H](O)[C@H]1CO,C7H15NO4,177.100108,-2.62300,5,4,3,1,84.16
4,ZINC000257519833,N[C@]1(CO)OC[C@H](CO)[C@H]1O,C6H13NO4,163.084458,-2.36670,5,4,2,1,95.94
...,...,...,...,...,...,...,...,...,...,...
3613,ZINC000003649889,NC(=O)NOCC[C@H](N)C(=O)O,C5H11N3O4,177.074956,-1.61170,4,4,5,0,127.67
3614,ZINC000379325533,N=C(CO)[C@H](O)[C@H](O)[C@H](O)CO,C6H13NO5,179.079373,-2.92653,6,6,5,0,125.00
3615,ZINC000100201838,O=C(CO)[C@H](O)[C@@H](O)C(=O)CO,C6H10O6,178.047738,-3.16900,6,4,5,0,115.06
3616,ZINC000100806197,OC[C@@H]1N[C@H](CO)[C@@H](O)[C@@H](O)[C@@H]1O,C7H15NO5,193.095023,-3.60590,6,6,2,1,113.18


In [57]:
#Extraindo arquivo 
pd.DataFrame(dicionario_de_dados).to_csv('biogenic_molecules.csv', sep = ';')