In [None]:
# Import necessary libraries
from sklearn.model_selection import train_test_split  # To split data into training and testing sets
from sklearn.preprocessing import StandardScaler  # For standardizing input features
from sklearn.metrics import mean_squared_error, r2_score  # For evaluating the model
from tensorflow.keras.models import Sequential  # To create a sequential neural network model
from tensorflow.keras.layers import Dense, Dropout  # Layers for the neural network
from tensorflow.keras.callbacks import EarlyStopping  # Callback for early stopping

In [None]:
# Separate the input features (X) and target labels (Y)
X = concatenated_df.drop(columns=['Library IC50'])  # Features
Y = concatenated_df['Library IC50']  # Target

In [None]:
# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [None]:
# Standardize the input features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  # Fit and transform training data
X_test = scaler.transform(X_test)  # Transform test data

In [None]:
# Define the neural network model
model = Sequential()
model.add(Dense(128, input_shape=(X_train.shape[1],), activation='relu'))  # Input layer with 128 neurons
model.add(Dropout(0.2))  # Dropout layer with a 20% dropout rate to prevent overfitting
model.add(Dense(64, activation='relu'))  # Hidden layer with 64 neurons and ReLU activation
model.add(Dense(32, activation='relu'))  # Hidden layer with 32 neurons and ReLU activation
model.add(Dense(1, activation='linear'))  # Output layer with linear activation for regression

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')  # Using the Adam optimizer and mean squared error loss

# Train the model
history = model.fit(X_train, Y_train, epochs=50, batch_size=64, validation_split=0.2, verbose=1, 
                    callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)])
# Training the model for 50 epochs with a batch size of 64, using early stopping to prevent overfitting

In [None]:
# Evaluate the model on the test set
mse = model.evaluate(X_test, Y_test)  # Calculating the Mean Squared Error (MSE) on the test data
print(f'Mean Squared Error on Test Data: {mse}')  # Printing the MSE

In [None]:
# Make predictions on the test data
Y_pred = model.predict(X_test)  # Making predictions using the trained model on the test data

In [None]:
r_squared = r2_score(Y_test, Y_pred)  # Computing the R-squared value for regression evaluation
print(f'R-squared (R²): {r_squared}')  # Printing the R-squared value