In [None]:
from IPython.display import clear_output

In [None]:
import os
import numpy as np
import pandas as pd
import csv
from tqdm import tqdm

In [None]:
path = 'drive/MyDrive/'

In [None]:
# 0: protein 1
# 1: protein 2
# 2: hbond acceptor
# 3: hbond donor
# 4: weak hbond acceptor?
# 5: weak hbond donor
# 6: pos ionisable
# 7: neg ionisable
# 8: hydrophobe
# 9: carbonyl oxygen
# 10: carbonyl carbon
# 11: aromatic


hbond_acceptor = {'ALA': ['O', 'OXT'],
                  'ARG': ['O', 'OXT'],
                  'ASN': ['ND2', 'O', 'OD1', 'OXT'],
                  'ASP': ['OD1', 'OD2', 'O', 'OXT'],
                  'CYS': ['SG', 'O', 'OXT'],
                  'GLN': ['NE2','O', 'OE1', 'OXT'],
                  'GLU': ['OE1', 'OE2', 'O', 'OXT'],
                  'GLY': ['O', 'OXT'],
                  'HIS': ['ND1', 'NE2', 'CE1', 'CD2', 'O', 'OXT'],
                  'ILE': ['O', 'OXT'],
                  'LEU': ['O', 'OXT'],
                  'LYS': ['O', 'OXT'],
                  'MET': ['SD', 'O', 'OXT'],
                  'PHE': ['O', 'OXT'],
                  'PRO': ['O', 'OXT'],
                  'SER': ['OG', 'O', 'OXT'],
                  'THR': ['OG1', 'O', 'OXT'],
                  'TRP': ['O', 'OXT'],
                  'TYR': ['OH', 'O', 'OXT'],
                  'VAL': ['O', 'OXT']}


hbond_donor = {'ALA': ['N'],
                  'ARG': ['N', 'NE', 'NH1', 'NH2'],
                  'ASN': ['N', 'ND2', 'OD1'],
                  'ASP': ['N'],
                  'CYS': ['N', 'SG'],
                  'GLN': ['N', 'NE2', 'OE1'],
                  'GLU': ['N'],
                  'GLY': ['N'],
                  'HIS': ['N', 'ND1', 'CE1', 'NE2', 'CD2'],
                  'ILE': ['N'],
                  'LEU': ['N'],
                  'LYS': ['N', 'NZ'],
                  'MET': ['N'],
                  'PHE': ['N'],
                  'SER': ['N', 'OG'],
                  'THR': ['N', 'OG1'],
                  'TRP': ['N', 'NE1'],
                  'TYR': ['N', 'OH'],
                  'VAL': ['N']}

weak_hbond_acceptor = {'ALA': ['O', 'OXT'],
                  'ARG': ['O', 'OXT'],
                  'ASN': ['O', 'OD1', 'ND2', 'OXT'],
                  'ASP': ['OD1', 'OD2', 'O', 'OXT'],
                  'CYS': ['SG', 'O', 'OXT'],
                  'GLN': ['NE2','O', 'OE1', 'OXT'],
                  'GLU': ['OE1', 'OE2', 'O', 'OXT'],
                  'GLY': ['O', 'OXT'],
                  'HIS': ['ND1', 'NE2', 'CE1', 'CD2', 'O', 'OXT'],
                  'ILE': ['O', 'OXT'],
                  'LEU': ['O', 'OXT'],
                  'LYS': ['O', 'OXT'],
                  'MET': ['SD', 'O', 'OXT'],
                  'PHE': ['O', 'OXT'],
                  'PRO': ['O', 'OXT'],
                  'SER': ['OG', 'O', 'OXT'],
                  'THR': ['OG1', 'O', 'OXT'],
                  'TRP': ['O', 'OXT'],
                  'TYR': ['OH', 'O', 'OXT'],
                  'VAL': ['O', 'OXT']}

weak_hbond_donor = {'ALA': ['CA', 'CB'],
                  'ARG': ['CA', 'CB', 'CG', 'CD'],
                  'ASN': ['CA', 'CB'],
                  'ASP': ['CA', 'CB'],
                  'CYS': ['CA', 'CB'],
                  'GLN': ['CA', 'CB', 'CG'],
                  'GLU': ['CA', 'CB', 'CG'],
                  'GLY': ['CA'],
                  'HIS': ['CA', 'CB'],
                  'ILE': ['CA', 'CB', 'CG1', 'CD1', 'CG2'],
                  'LEU': ['CA', 'CB', 'CG', 'CD1', 'CD2'],
                  'LYS': ['CA', 'CB', 'CG', 'CD', 'CE'],
                  'MET': ['CA', 'CB', 'CG', 'CE'],
                  'PHE': ['CA', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ'],
                  'PRO': ['CA', 'CB', 'CG', 'CD'],
                  'SER': ['CA', 'CB'],
                  'THR': ['CA', 'CB', 'CG2'],
                  'TRP': ['CA', 'CB', 'CD1', 'CE3', 'CZ3', 'CH2', 'CZ2'],
                  'TYR': ['CA', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ'],
                  'VAL': ['CA', 'CB', 'CG1', 'CG2']}

pos_ionisable = {'ARG': ['NE', 'CZ', 'NH1', 'NH2'],
                  'HIS': ['CG', 'ND1', 'CE1', 'NE2', 'CD2'],
                  'LYS': ['NZ']}

neg_ionisable = {'ASP': ['OD1', 'OD2'],
                 'GLU': ['OE1', 'OE2']}

hydrophobe = {'ALA': [ 'CB'],
                  'ARG': ['CB', 'CG'],
                  'ASN': ['CB'],
                  'ASP': ['CB'],
                  'CYS': ['CB'],
                  'GLN': ['CB', 'CG'],
                  'GLU': ['CB', 'CG'],
                  'HIS': ['CB'],
                  'ILE': ['CB', 'CG1', 'CD1', 'CG2'],
                  'LEU': ['CB', 'CG', 'CD1', 'CD2'],
                  'LYS': ['CB', 'CG', 'CD'],
                  'MET': ['CB', 'CG', 'CE', 'SD'],
                  'PHE': ['CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ'],
                  'PRO': ['CB', 'CG'],
                  'THR': ['CG2'],
                  'TRP': ['CB', 'CG', 'CD2', 'CE3', 'CZ3', 'CH2', 'CZ2'],
                  'TYR': ['CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2'],
                  'VAL': ['CB', 'CG1', 'CG2']}

carbonyl_oxygen = {'ALA': ['O'],
                  'ARG': ['O'],
                  'ASN': ['O'],
                  'ASP': ['O'],
                  'CYS': ['O'],
                  'GLN': ['O'],
                  'GLU': ['O'],
                  'GLY': ['O'],
                  'HIS': ['O'],
                  'ILE': ['O'],
                  'LEU': ['O'],
                  'LYS': ['O'],
                  'MET': ['O'],
                  'PHE': ['O'],
                  'PRO': ['O'],
                  'SER': ['O'],
                  'THR': ['O'],
                  'TRP': ['O'],
                  'TYR': ['O'],
                  'VAL': ['O']}

carbonyl_carbon = {'ALA': ['C'],
                  'ARG': ['C'],
                  'ASN': ['C'],
                  'ASP': ['C'],
                  'CYS': ['C'],
                  'GLN': ['C'],
                  'GLU': ['C'],
                  'GLY': ['C'],
                  'HIS': ['C'],
                  'ILE': ['C'],
                  'LEU': ['C'],
                  'LYS': ['C'],
                  'MET': ['C'],
                  'PHE': ['C'],
                  'PRO': ['C'],
                  'SER': ['C'],
                  'THR': ['C'],
                  'TRP': ['C'],
                  'TYR': ['C'],
                  'VAL': ['C']}

aromatic = {'HIS': ['CG', 'ND1', 'CE1', 'NE2', 'CD2'],
            'PHE': ['CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ'],
            'TRP': ['CG', 'CD1', 'CD2', 'NE1', 'CE2', 'CE3', 'CZ2', 'CZ3', 'CH2'],
            'TYR': ['CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ']}

In [None]:
list_str = os.listdir(f'{path}/Cell_tables_10_test_1')
len(list_str)

In [None]:
for num_str in tqdm(range(len(list_str))):
  str_id = list_str[num_str][:4]
  str_name = str_id
  Cell_atoms = pd.read_csv(f'{path}/Cell_tables_10_test_1/{list_str[num_str]}', index_col=0)
  chain_0 = []
  chain_1 = []
  hb_a = []
  hb_d = []
  w_hb_d = []
  pos_charge = []
  neg_charge = []
  hph = []
  carb_o = []
  carb_c = []
  ar = []
  for i in range(Cell_atoms.shape[0]):
    atom_str = Cell_atoms.iloc[i]
    if atom_str['Chain_num']==0:
      arr_item = -1
    else:
      arr_item = 1
    if atom_str['Residue'] in hbond_acceptor and atom_str['Type'] in hbond_acceptor[atom_str['Residue']]:

      acceptor = arr_item
    else:
      acceptor = 0
    hb_a.append(acceptor)

    if atom_str['Residue'] in hbond_donor and atom_str['Type'] in hbond_donor[atom_str['Residue']]:
      donor = arr_item
    else:
      donor = 0
    hb_d.append(donor)

    if atom_str['Residue'] in weak_hbond_donor and atom_str['Type'] in weak_hbond_donor[atom_str['Residue']]:
      wdonor = arr_item
    else:
      wdonor = 0
    w_hb_d.append(wdonor)

    if atom_str['Residue'] in pos_ionisable and atom_str['Type'] in pos_ionisable[atom_str['Residue']]:
      pos = arr_item
    else:
      pos = 0
    pos_charge.append(pos)

    if atom_str['Residue'] in neg_ionisable and atom_str['Type'] in neg_ionisable[atom_str['Residue']]:
      neg = arr_item
    else:
      neg = 0
    neg_charge.append(neg)

    if atom_str['Residue'] in hydrophobe and atom_str['Type'] in hydrophobe[atom_str['Residue']]:
      h = arr_item
    else:
      h = 0
    hph.append(h)

    if atom_str['Type'] == 'O':
      o = arr_item
    else:
      o = 0
    carb_o.append(o)

    if atom_str['Type'] == 'C':
      c = arr_item
    else:
      c = 0
    carb_c.append(c)

    if atom_str['Residue'] in aromatic and atom_str['Type'] in aromatic[atom_str['Residue']]:
      a = arr_item
    else:
      a = 0
    ar.append(a)

  Cell_atoms['HBond_acceptor'] = hb_a
  Cell_atoms['HBond_donor'] = hb_d
  Cell_atoms['Weak_HBond_donor'] = w_hb_d
  Cell_atoms['Pos_charge'] = pos_charge
  Cell_atoms['Neg_charge'] = neg_charge
  Cell_atoms['Hydrophoby'] = hph
  Cell_atoms['Carbocsile_o'] = carb_o
  Cell_atoms['Carbocsile_c'] = carb_c
  Cell_atoms['Aromatic'] = ar

  num_channels = 10
  high_gr = 20
  length_gr = 40
  width_gr = 40
  coord_array = np.zeros((num_channels, length_gr*2+1, width_gr*2+1, high_gr*2+1))
  for a in range(Cell_atoms.shape[0]):
    x = round(Cell_atoms['x_new'][a])
    y = round(Cell_atoms['y_new'][a])
    z = round(Cell_atoms['z_new'][a])
    if Cell_atoms['Chain_num'][a] == 0:
      coord_array[0][x][y][z] = Cell_atoms['HBond_acceptor'][a]
      coord_array[1][x][y][z] = Cell_atoms['HBond_donor'][a]
      coord_array[2][x][y][z] = Cell_atoms['Weak_HBond_donor'][a]
      coord_array[3][x][y][z] = Cell_atoms['HBond_acceptor'][a]
      coord_array[4][x][y][z] = Cell_atoms['Pos_charge'][a]
      coord_array[5][x][y][z] = Cell_atoms['Neg_charge'][a]
      coord_array[6][x][y][z] = Cell_atoms['Hydrophoby'][a]
      coord_array[7][x][y][z] = Cell_atoms['Carbocsile_o'][a]
      coord_array[8][x][y][z] = Cell_atoms['Carbocsile_c'][a]
      coord_array[9][x][y][z] = Cell_atoms['Aromatic'][a]
    else:
      coord_array[0][x][y][z] = Cell_atoms['HBond_donor'][a]
      coord_array[1][x][y][z] = Cell_atoms['HBond_acceptor'][a]
      coord_array[2][x][y][z] = Cell_atoms['HBond_acceptor'][a]
      coord_array[3][x][y][z] = Cell_atoms['Weak_HBond_donor'][a]
      coord_array[4][x][y][z] = Cell_atoms['Neg_charge'][a]
      coord_array[5][x][y][z] = Cell_atoms['Pos_charge'][a]
      coord_array[6][x][y][z] = Cell_atoms['Hydrophoby'][a]
      coord_array[7][x][y][z] = Cell_atoms['Carbocsile_o'][a]
      coord_array[8][x][y][z] = Cell_atoms['Carbocsile_c'][a]
      coord_array[9][x][y][z] = Cell_atoms['Aromatic'][a]

  with open(f'{path}/Dataset/{str_name}.npy', 'wb') as f:
    np.save(f, coord_array)


100%|██████████| 125/125 [01:08<00:00,  1.81it/s]
