# Crear Dataframe
<hr>

In [1]:
import pandas as pd

In [2]:
B3DB = pd.read_csv("..\Datos\B3DB_classification.tsv", sep="\t")

# Seleccionar columnas
<hr>

In [3]:
B3DB.columns

Index(['NO.', 'compound_name', 'IUPAC_name', 'SMILES', 'CID', 'logBB',
       'BBB+/BBB-', 'Inchi', 'threshold', 'reference', 'group', 'comments'],
      dtype='object')

In [4]:
B3DB_sel = B3DB[['SMILES', 'BBB+/BBB-', 'group']]

In [5]:
B3DB_sel

Unnamed: 0,SMILES,BBB+/BBB-,group
0,O=C(O)c1cc(N=Nc2ccc(S(=O)(=O)Nc3ccccn3)cc2)ccc1O,BBB-,A
1,COC1(NC(=O)C(C(=O)O)c2ccc(O)cc2)C(=O)N2C(C(=O)...,BBB-,A
2,Oc1c(I)cc(Cl)c2cccnc12,BBB-,A
3,CCNC(=NCCSCc1ncccc1Br)NC#N,BBB-,A
4,CN1CC[C@]23c4c5ccc(OC6O[C@H](C(=O)O)[C@@H](O)[...,BBB-,A
...,...,...,...
7802,c1ccc(CN(CC2=NCCN2)c2ccccc2)cc1,BBB-,D
7803,CCOCCn1c(N2CCCN(C)CC2)nc2ccccc21,BBB+,D
7804,CN1CCC(=C2c3ccccc3CC(=O)c3sccc32)CC1,BBB+,D
7805,Cc1[nH]c(=O)c(C#N)cc1-c1ccncc1,BBB-,D


# Calcular las huellas de morgan
<hr>

In [6]:
from rdkit import Chem
from rdkit.Chem import AllChem


In [7]:
def morgan_fingerprint(smiles):
    mol = Chem.MolFromSmiles(smiles)
    fingerprint = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=2048)
    fingerprint_list = list(fingerprint)
    return fingerprint_list

In [8]:
B3DB_sel["MorganFP"] = B3DB["SMILES"].apply(morgan_fingerprint)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  B3DB_sel["MorganFP"] = B3DB["SMILES"].apply(morgan_fingerprint)


In [9]:
B3DB_sel

Unnamed: 0,SMILES,BBB+/BBB-,group,MorganFP
0,O=C(O)c1cc(N=Nc2ccc(S(=O)(=O)Nc3ccccn3)cc2)ccc1O,BBB-,A,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,COC1(NC(=O)C(C(=O)O)c2ccc(O)cc2)C(=O)N2C(C(=O)...,BBB-,A,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,Oc1c(I)cc(Cl)c2cccnc12,BBB-,A,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,CCNC(=NCCSCc1ncccc1Br)NC#N,BBB-,A,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,CN1CC[C@]23c4c5ccc(OC6O[C@H](C(=O)O)[C@@H](O)[...,BBB-,A,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...
7802,c1ccc(CN(CC2=NCCN2)c2ccccc2)cc1,BBB-,D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
7803,CCOCCn1c(N2CCCN(C)CC2)nc2ccccc21,BBB+,D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ..."
7804,CN1CCC(=C2c3ccccc3CC(=O)c3sccc32)CC1,BBB+,D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."
7805,Cc1[nH]c(=O)c(C#N)cc1-c1ccncc1,BBB-,D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [10]:
# Expandir bits
bits_df = B3DB_sel['MorganFP'].apply(pd.Series)

In [11]:
B3DB_sel = pd.concat((B3DB_sel, bits_df), axis=1)

In [12]:
B3DB_sel

Unnamed: 0,SMILES,BBB+/BBB-,group,MorganFP,0,1,2,3,4,5,...,2038,2039,2040,2041,2042,2043,2044,2045,2046,2047
0,O=C(O)c1cc(N=Nc2ccc(S(=O)(=O)Nc3ccccn3)cc2)ccc1O,BBB-,A,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,COC1(NC(=O)C(C(=O)O)c2ccc(O)cc2)C(=O)N2C(C(=O)...,BBB-,A,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Oc1c(I)cc(Cl)c2cccnc12,BBB-,A,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,CCNC(=NCCSCc1ncccc1Br)NC#N,BBB-,A,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,CN1CC[C@]23c4c5ccc(OC6O[C@H](C(=O)O)[C@@H](O)[...,BBB-,A,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7802,c1ccc(CN(CC2=NCCN2)c2ccccc2)cc1,BBB-,D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7803,CCOCCn1c(N2CCCN(C)CC2)nc2ccccc21,BBB+,D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7804,CN1CCC(=C2c3ccccc3CC(=O)c3sccc32)CC1,BBB+,D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7805,Cc1[nH]c(=O)c(C#N)cc1-c1ccncc1,BBB-,D,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Guardar los DataFrames resultantes que contienen la información
<hr>

In [13]:
B3DB_sel.to_csv("../Datos tratados/Datos_con_ECFP.csv", index = False)

In [14]:
BBB = B3DB_sel["BBB+/BBB-"]

In [15]:
BBB.to_csv("../Datos tratados/BBB.csv", index = False)

In [16]:
bits_df.to_csv("../Datos tratados/Bits_huella.csv", index = False)