# **1) Initiall Instructions**

In [None]:
!pip install -q rdkit
!pip install -q tensorflow
!pip install -q opencv-python

In [None]:
import pandas as pd
import numpy as np
import joblib
import tensorflow as tf
import glob
import os
import cv2

from rdkit import Chem
from rdkit.Chem.SaltRemover import SaltRemover
from rdkit.Chem.AllChem import GetMorganGenerator

# **2) Model Loading**

In [None]:
path = r"filepath/fCNN_launching_dir"
os.chdir(path)

for elem in os.listdir(os.getcwd()):
    if elem.endswith('.joblib'):
        fCNN_model = joblib.load(elem)
        print('Model loaded succesfully!')
    elif elem.endswith('.csv'):
        data = pd.read_csv(elem)
        print('Data loaded succesfully!')

df = pd.DataFrame(data)
df.head()

# **3) Data Preparation**

In [None]:
#Fingerprint generation and salt removal
morgan_gen = GetMorganGenerator(fpSize=8192, radius=5)
remover = SaltRemover()

fps_list = []
for smiles in df['smiles']:
  mol = Chem.MolFromSmiles(smiles)
  mol = remover.StripMol(mol)
  fps_list.append(morgan_gen.GetFingerprint(mol))

df['fps'] = fps_list

In [None]:
"""This section is responsible for loading images and data intergration (images-fingerprints). Fragment '/**/*.png' is essential for recursive search with glob. """

path_for_data = r"filepath/fCNN_launching_dir/**/*.png"

images = glob.glob(path_for_data, recursive=True)
chembl_id_list = df['id'].to_list()
img_list = []
fps_list = []

for idx, image in enumerate(images):
  for id in chembl_id_list:
    if id in os.path.basename(image):

      img = cv2.imread(image)
      img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
      resized_image = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)
      img_list.append(resized_image)
      fps_list.append(np.array(df[df['id'] == id]['fps'].values[0]))
    
print(len(img_list))
print(len(fps_list))
print()

In [None]:
#Converting lists to arrays
fps_arr = np.array(fps_list)
img_arr = np.array(img_list)

#Scaling values to 0-1 range
img_arr = img_arr.astype(np.float32, copy=False)
img_arr /= 255.0

# **4) Prediction**

In [None]:
#Prediction 
y_pred = fCNN_model.predict([img_arr, fps_arr])
y_pred = np.where(y_pred > 0.5, 1, 0)

In [None]:
#Labels Display
y_pred = y_pred.flatten()
for idx, elem in enumerate(chembl_id_list):
    start = idx * 7
    end = start + 7
    print(elem) 
    print(f'Labels for each record of {elem}: {y_pred[start:end]}')
    print()