In [None]:
# Note: I ran this on Google Colab 

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten, Dense
from sklearn.model_selection import train_test_split
import pandas as pd 

file_path = 'gdb9_G4MP2_withdata_hydrogenation_clean.csv'
data = pd.read_csv(file_path)


# Tokenize the SMILES strings
tokenizer = Tokenizer(char_level=True)
tokenizer.fit_on_texts(data['unsat_SMILE'])
sequences = tokenizer.texts_to_sequences(data['unsat_SMILE'])
X = pad_sequences(sequences)

# Target variable
y = data['delta_H'].values

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128, input_length=X.shape[1]))
model.add(Conv1D(64, 3, activation='relu'))
model.add(MaxPooling1D(3))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Train the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)

# You can now evaluate the model, make predictions, etc.


In [None]:
# Evaluate the model on the test data
loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")

# Make predictions
predictions = model.predict(X_test)

# If you want to visualize the predictions vs. the actual values, you can use a scatter plot
import matplotlib.pyplot as plt

plt.scatter(y_test, predictions)
plt.xlabel("Actual Hydrogenation Enthalpy")
plt.ylabel("Predicted Hydrogenation Enthalpy")
plt.title("Actual vs Predicted Hydrogenation Enthalpy")
plt.show()

# You can also compute other metrics, like the R2 score, to get a sense of how well the model is performing
from sklearn.metrics import r2_score

r2 = r2_score(y_test, predictions)
print(f"R^2 Score: {r2}")