In [1]:
import numpy as np
import rdkit
from rdkit import Chem

In [2]:
pep_data = np.loadtxt('PepData_417.csv', delimiter=',', dtype=str, comments='!')
pep_list = pep_data[:, 0].flatten().tolist()
smi_list = []

In [3]:
ref = {'G':1, 'A':2, 'V':3, 'L':4, 'I':5, 'M':6, 'C':7, 'T':8, 'S':9, 'N':10, 'Q':11, 'D':12, 'E':13, 'F':14, 'Y':15, 'W':16, 'H':17, 'P':18, 'K':19, 'R':20, 'O':0}

In [4]:
data = np.zeros((len(pep_list), 7))
for i in range(len(pep_list)):
    pep = pep_list[i]
    amino = pep.split('.')
    for j in range(len(amino)):
        data[i, j] = ref[amino[j]]
np.savetxt('PepAmino_'+str(len(pep_list))+'.csv', data, delimiter=',', fmt='%s')

In [5]:
for pep in pep_list:
    s = 'PEPTIDE1{'+pep+'}$$$$'
    print(s)
    mol = Chem.MolFromHELM(s)
    smi = Chem.MolToSmiles(mol)
    smi_list.append(smi)
#     print(smi)

PEPTIDE1{G.A.I.K.R.D}$$$$
PEPTIDE1{V.A.E.N.G}$$$$
PEPTIDE1{L.N.N.A.R.H}$$$$
PEPTIDE1{V.A.E.N.I.P}$$$$
PEPTIDE1{S.V.N.N}$$$$
PEPTIDE1{V.A.E.N.M}$$$$
PEPTIDE1{S.V.N.D}$$$$
PEPTIDE1{M.K.I.I.I.P}$$$$
PEPTIDE1{M.L.L.P.K.L}$$$$
PEPTIDE1{V.A.N.I}$$$$
PEPTIDE1{M.P.K.I.I.L}$$$$
PEPTIDE1{V.A.F.L.E}$$$$
PEPTIDE1{I.A.V.N.E}$$$$
PEPTIDE1{I.A.E.N.I}$$$$
PEPTIDE1{A.R.E.N.P.I.E}$$$$
PEPTIDE1{T.A.W.A.A.V}$$$$
PEPTIDE1{I.A.N.N.L.F}$$$$
PEPTIDE1{V.A.E.N.L}$$$$
PEPTIDE1{G.A.I.K.R.R}$$$$
PEPTIDE1{V.A.I.N.E}$$$$
PEPTIDE1{S.V.N.Y}$$$$
PEPTIDE1{I.A.L.N.E}$$$$
PEPTIDE1{V.A.E.N.I.M}$$$$
PEPTIDE1{M.I.I.V.M}$$$$
PEPTIDE1{V.A.E.N.I.W}$$$$
PEPTIDE1{V.T.G.V.W.A}$$$$
PEPTIDE1{M.M.V.I.I.V}$$$$
PEPTIDE1{G.F.S}$$$$
PEPTIDE1{K.V.S.V.V.N}$$$$
PEPTIDE1{D.D.R.N.W}$$$$
PEPTIDE1{D.D.R.N.D}$$$$
PEPTIDE1{M.V.I.I.M}$$$$
PEPTIDE1{I.V.V.N.N.N}$$$$
PEPTIDE1{L.P.R.F}$$$$
PEPTIDE1{K.V.V.A.S.E}$$$$
PEPTIDE1{A.V.W.G.T.V}$$$$
PEPTIDE1{A.V.I.F.E.V}$$$$
PEPTIDE1{V.F.M.M.I.A}$$$$
PEPTIDE1{P.I.I.K.M.L}$$$$
PEPTIDE1{G.L}$$$$
PEPTIDE1{A.M.T.I

In [6]:
out = np.array(smi_list).reshape(len(pep_list), 1)
np.savetxt('Smiles_'+str(len(pep_list))+'.csv', out, delimiter=',', fmt='%s')

In [7]:
out = np.hstack((out, np.zeros((len(pep_list), 1))+1))
out = np.vstack((np.array(['smiles', 'values']).reshape(1, 2), out))
np.savetxt('PepDataCMF_'+str(len(pep_list))+'.csv', out, delimiter=',', fmt='%s')

In [8]:
out = pep_data[:, 1].reshape(len(pep_list), 1)
np.savetxt('Values_SelPos3_'+str(len(pep_list))+'.csv', out, delimiter=',', fmt='%s')
out = pep_data[:, 2].reshape(len(pep_list), 1)
np.savetxt('Values_SelPos2_'+str(len(pep_list))+'.csv', out, delimiter=',', fmt='%s')
out = pep_data[:, 3].reshape(len(pep_list), 1)
np.savetxt('Values_SelPos1_'+str(len(pep_list))+'.csv', out, delimiter=',', fmt='%s')
out = pep_data[:, 4].reshape(len(pep_list), 1)
np.savetxt('Values_ActPos3_'+str(len(pep_list))+'.csv', out, delimiter=',', fmt='%s')
out = pep_data[:, 5].reshape(len(pep_list), 1)
np.savetxt('Values_ActPos2_'+str(len(pep_list))+'.csv', out, delimiter=',', fmt='%s')
out = pep_data[:, 6].reshape(len(pep_list), 1)
np.savetxt('Values_ActPos1_'+str(len(pep_list))+'.csv', out, delimiter=',', fmt='%s')
out = pep_data[:, 7].reshape(len(pep_list), 1)
np.savetxt('Values_ActTotal_'+str(len(pep_list))+'.csv', out, delimiter=',', fmt='%s')

In [9]:
out = np.array(pep_list).reshape(len(pep_list), 1)
np.savetxt('PepName_'+str(len(pep_list))+'.csv', out, delimiter=',', fmt='%s')