In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Dense, Flatten
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split

# Load dataset
data = pd.read_csv('ActiveCompoundPersticide.csv')

# Encode labels
le = LabelEncoder()
data['Status'] = le.fit_transform(data['Status'])

# Transform features with one hot encoding
one_hot = pd.get_dummies(data['Substance'])
X = one_hot.values.reshape(len(data), one_hot.shape[1], 1)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, data['Status'], test_size=0.2)

# Define the CNN model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X.shape[1], X.shape[2])))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the CNN model
model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the CNN model
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate the CNN model
loss, accuracy = model.evaluate(X_test, y_test)
print('Test loss:', loss)
print('Test accuracy:', accuracy)


2023-03-21 17:44:42.255703: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-03-21 17:44:42.256323: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-03-21 17:44:42.256386: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (debian): /proc/driver/nvidia/version does not exist
2023-03-21 17:44:42.257301: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super().__init__(name, **kwargs)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 1.5731362104415894
Test accuracy: 0.5614035129547119


In [3]:
# save model ke pickle
import pickle
pickle.dump(model, open('CNNAIPesticide.pkl', 'wb'))

Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers
......conv1d
.........vars
............0
............1
......dense
.........vars
............0
............1
......dense_1
.........vars
............0
............1
......flatten
.........vars
......max_pooling1d
.........vars
...metrics
......mean
.........vars
............0
............1
......mean_metric_wrapper
.........vars
............0
............1
...vars
Keras model archive saving:
File Name                                             Modified             Size
variables.h5                                   2023-03-21 17:47:38      7062456
config.json                                    2023-03-21 17:47:38         2542
metadata.json                                  2023-03-21 17:47:38           64


In [None]:
# open pickle model
import pickle
model = pickle.load(open('CNNAIPesticide.pkl', 'rb'))

In [None]:
# show CNN architecture model in here use matplotlib
from keras.utils import plot_model
plot_model(model, show_shapes=True, show_layer_names=True)

In [None]:
import pickle
from keras.models import Sequential
from keras.layers import Dense

# Membangun model generator
generator = Sequential()
generator.add(Dense(256, input_dim=100, kernel_initializer='glorot_normal', activation='relu'))
generator.add(Dense(512, activation='relu'))
generator.add(Dense(1024, activation='relu'))
generator.add(Dense(2048, activation='relu'))
generator.add(Dense(4096, activation='relu'))
generator.add(Dense(8192, activation='relu'))
generator.add(Dense(10912, activation='softmax'))

# Menyimpan model ke file pickle
with open('generator_model.pkl', 'wb') as file:
    pickle.dump(generator, file)


In [None]:
# show generator architecture model in here use matplotlib
from keras.utils import plot_model
plot_model(generator, show_shapes=True, show_layer_names=True)

In [None]:
# generate random smiles data using generator model
import pickle
import numpy as np
from rdkit import Chem
from rdkit.Chem import Draw

# load model
with open('generator_model.pkl', 'rb') as file:
    generator = pickle.load(file)

# generate random smiles data
smiles = []
for i in range(10):
    z = np.random.normal(0, 1, (1, 100))
    generated_smiles = generator.predict(z)
    generated_smiles = generated_smiles[0]
    generated_smiles = np.argmax(generated_smiles)
    generated_smiles = np.binary_repr(generated_smiles, width=10912)
    print(generated_smiles)
    generated_smiles = Chem.MolFromSmiles(generated_smiles)
    smiles.append(generated_smiles)

# print generated smiles data
print(smiles)

In [None]:
import pandas as pd

# membaca dataset
data = pd.read_csv('ActiveCompoundPersticide.csv')

# mengubah kolom Substance menjadi sebuah list
smiles_list = data['Substance'].tolist()

from rdkit import Chem
from rdkit.Chem import AllChem
from keras.preprocessing.sequence import pad_sequences
import numpy as np

def smiles_to_vector(smiles, max_length=100):
    mol = Chem.MolFromSmiles(smiles)
    fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=2048)
    arr = np.zeros((1,))
    Chem.DataStructs.ConvertToNumpyArray(fp, arr)
    return pad_sequences([arr], maxlen=max_length, dtype='float32', padding='post')

def generate_random_input_vector(n_samples, input_dim=100):
    return np.random.normal(size=(n_samples, input_dim))


In [None]:
# latih model generator
from keras.optimizers import Adam

# konfigurasi hyperparameter
batch_size = 128
epochs = 10000
input_dim = 100

# kompilasi model
generator.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.0002, beta_1=0.5))

# latih model
for epoch in range(epochs):
    # generate data acak
    random_input = generate_random_input_vector(batch_size, input_dim)
    # generate data SMILES dari data acak
    generated_smiles = generator.predict(random_input)
    # latih model dengan data SMILES yang telah digenerate
    generator.train_on_batch(random_input, generated_smiles)
    # tampilkan progress
    if epoch % 100 == 0:
        print(f'Epoch {epoch} / {epochs}')


In [None]:
import pickle

# simpan model generator ke dalam file pickle
with open('generator_model.pickle', 'wb') as f:
    pickle.dump(generator, f)

In [None]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Reshape, Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
# hapus warning 
import warnings
warnings.filterwarnings('ignore')


# Load the data
data = pd.read_csv('ActiveCompoundPersticide.csv')

# Get the SMILES data
smiles_list = data['Substance'].tolist()

# Define constants
max_length = 100
input_dim = 2048

# Define the generator model
generator = Sequential()
generator.add(Dense(256, input_dim=input_dim, activation='relu'))
generator.add(Dense(512, activation='relu'))
generator.add(Dense(1024, activation='relu'))
generator.add(Dense(2048, activation='relu'))
generator.add(Dense(4096, activation='relu'))
generator.add(Dense(8192, activation='relu'))
generator.add(Dense(max_length*input_dim, activation='relu'))
generator.add(Reshape((max_length, input_dim)))
generator.add(LSTM(512, return_sequences=True))
generator.add(Dropout(0.2))
generator.add(LSTM(256, return_sequences=True))
generator.add(Dropout(0.2))
generator.add(Dense(input_dim, activation='softmax'))

generator.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001, beta_1=0.5))

# Define the training data generator
def train_generator(batch_size):
    while True:
        random_indices = np.random.randint(0, len(smiles_list), batch_size)
        random_smiles = [smiles_list[i] for i in random_indices]
        x_train = np.array([smiles_to_vector(smiles) for smiles in random_smiles])
        y_train = np.array([smiles_to_vector(smiles) for smiles in random_smiles])
        yield x_train, y_train

# Define the function to convert SMILES to vectors
def smiles_to_vector(smiles):
    mol = Chem.MolFromSmiles(smiles)
    fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=input_dim)
    arr = np.zeros((1,))
    Chem.DataStructs.ConvertToNumpyArray(fp, arr)
    return arr

# Define the checkpoint to save the model during training
#checkpoint = ModelCheckpoint('generator_model.h5', verbose=1, save_best_only=True)

# Train the generator model
generator.fit_generator(train_generator(batch_size=32), steps_per_epoch=100, epochs=100, callbacks=[checkpoint])


2023-03-26 00:40:33.811103: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-26 00:40:34.464651: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-03-26 00:40:34.464676: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-03-26 00:40:36.030938: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-