In [None]:
import gpflow
from gpflow.mean_functions import Constant
from gpflow.utilities import positive, print_summary
# Removed broadcasting_elementwise as standard tf ops are used
# Removed matplotlib import as we won't plot individual fold results by default
# from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from rdkit.Chem import AllChem, Descriptors, MolFromSmiles, SanitizeMol
from sklearn.model_selection import KFold # Import KFold
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import os # For file check
from typing import Tuple, Optional # For type hints

# --- Constants ---
DATA_FILE = '/content/photoswitches.csv'
TARGET_COLUMN = 'E isomer pi-pi* wavelength in nm'
SMILES_COLUMN = 'SMILES'
MORGAN_RADIUS = 2
MORGAN_NBITS = 2048
RANDOM_STATE = 15 # Use a fixed random state for reproducibility
OPTIMIZER_MAXITER = 100
KERNEL_JITTER = 1e-9 # Small value for numerical stability
N_FOLDS = 5 # Number of folds for cross-validation

# --- Custom Tanimoto Kernel ---
class Tanimoto(gpflow.kernels.Kernel):
    """
    Tanimoto kernel implementation for binary fingerprints.
    Formula: σ² * (<x, y>) / (||x||² + ||y||² - <x, y> + ε)
    where <x, y> is the dot product (count of common bits for binary vectors),
    and ||x||² is the squared L2 norm (count of set bits for binary vectors).
    ε is a small jitter term for numerical stability.
    """
    def __init__(self, variance: float = 1.0, active_dims: Optional[list] = None):
        super().__init__(active_dims=active_dims)
        # Constrain the variance parameter to be positive during optimization
        self.variance = gpflow.Parameter(variance, transform=positive())

    def K(self, X: tf.Tensor, X2: Optional[tf.Tensor] = None) -> tf.Tensor:
        """
        Compute the Tanimoto kernel matrix.

        :param X: N x D tensor of fingerprints.
        :param X2: M x D tensor of fingerprints. If None, computes K(X, X).
        :return: The kernel matrix of dimension N x M.
        """
        if X2 is None:
            X2 = X

        # Ensure input is float for calculations
        X = tf.cast(X, tf.float64)
        X2 = tf.cast(X2, tf.float64)

        # Calculate norms and dot products
        Xs = tf.reduce_sum(tf.square(X), axis=-1)  # Shape (N,)
        X2s = tf.reduce_sum(tf.square(X2), axis=-1) # Shape (M,)
        # Dot product <x, y> : N x M
        outer_product = tf.tensordot(X, X2, axes=[[-1], [-1]])

        # Calculate denominator: ||x||² + ||y||² - <x, y>
        # Use broadcasting: Xs[:, None] has shape (N, 1), X2s[None, :] has shape (1, M)
        denominator = Xs[:, None] + X2s[None, :] - outer_product

        # Add jitter for numerical stability to avoid division by zero or near-zero
        denominator = tf.maximum(denominator, KERNEL_JITTER)

        # Compute Tanimoto similarity and scale by variance
        similarity = outer_product / denominator
        return self.variance * similarity

    def K_diag(self, X: tf.Tensor) -> tf.Tensor:
        """
        Compute the diagonal of the kernel matrix K(X, X).
        For Tanimoto kernel K(x, x) = σ² * ||x||² / (||x||² + ||x||² - ||x||²) = σ²
        Assumes X contains valid (non-zero norm) fingerprints.

        :param X: N x D tensor of fingerprints.
        :return: N x 1 tensor of the diagonal elements (all equal to variance).
        """
        # The diagonal elements K(x, x) are always equal to the variance
        return tf.fill(tf.shape(X)[:-1], self.variance) # tf.squeeze removed for consistency (shape N)

# --- Data Loading and Preprocessing ---
def load_and_preprocess_data(
    file_path: str, smiles_col: str, target_col: str, radius: int, nBits: int
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Loads data, generates fingerprints, and handles missing values.

    :param file_path: Path to the CSV data file.
    :param smiles_col: Name of the column containing SMILES strings.
    :param target_col: Name of the column containing target property values.
    :param radius: Morgan fingerprint radius.
    :param nBits: Morgan fingerprint number of bits.
    :return: Tuple containing fingerprints (X) and target values (y).
    :raises FileNotFoundError: If the file does not exist.
    :raises ValueError: If essential columns are missing or data processing fails.
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Data file not found at: {file_path}")

    df = pd.read_csv(file_path)

    if smiles_col not in df.columns or target_col not in df.columns:
        raise ValueError(f"Required columns '{smiles_col}' or '{target_col}' not found in {file_path}")

    # Drop rows where target or SMILES is NaN/missing
    df = df[[smiles_col, target_col]].dropna()
    if df.empty:
         raise ValueError("No valid data remaining after removing NaNs.")

    smiles_list = df[smiles_col].to_list()
    property_vals = df[target_col].to_numpy()

    # Generate RDKit molecules and fingerprints
    rdkit_mols = []
    valid_indices = []
    print("Generating fingerprints...")
    for i, smiles in enumerate(smiles_list):
        mol = MolFromSmiles(smiles)
        # Basic sanitization check
        if mol:
            try:
                # More robust sanitization
                SanitizeMol(mol)
                fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=radius, nBits=nBits)
                rdkit_mols.append(mol) # Keep mol if needed later, not strictly required for fp
                valid_indices.append(i)
            except Exception as e:
                 print(f"Warning: Skipping SMILES '{smiles}' due to RDKit error: {e}")
        else:
            print(f"Warning: Skipping invalid SMILES: {smiles}")

    if not valid_indices:
        raise ValueError("No valid molecules could be generated from SMILES strings.")

    # Filter property values based on valid molecules
    y = property_vals[valid_indices]

    # Generate Morgan fingerprints for the valid molecules
    X = [AllChem.GetMorganFingerprintAsBitVect(m, radius=radius, nBits=nBits) for m in rdkit_mols]
    X = np.asarray(X, dtype=np.float64) # Ensure float64 for TF/GPflow

    print(f"Generated {X.shape[0]} fingerprints.")
    return X, y.reshape(-1, 1) # Ensure y is N x 1

# --- Main Execution ---
if __name__ == "__main__":
    # Load and preprocess data
    try:
        X, y_orig = load_and_preprocess_data(DATA_FILE, SMILES_COLUMN, TARGET_COLUMN, MORGAN_RADIUS, MORGAN_NBITS)
        print(f"Loaded {X.shape[0]} valid samples.")
    except (FileNotFoundError, ValueError) as e:
        print(f"Error loading data: {e}")
        exit(1) # Exit if data loading fails

    # --- K-Fold Cross-Validation Setup ---
    kf = KFold(n_splits=N_FOLDS, shuffle=True, random_state=RANDOM_STATE)

    # Lists to store metrics from each fold
    fold_r2_scores = []
    fold_rmse_scores = []
    fold_mae_scores = []
    fold_train_rmse_scores = [] # Optional: Track training RMSE per fold

    print(f"\nStarting {N_FOLDS}-Fold Cross-Validation...")

    # Loop through each fold
    for fold, (train_index, val_index) in enumerate(kf.split(X, y_orig)):
        print(f"\n--- Fold {fold + 1}/{N_FOLDS} ---")

        # Split data for the current fold
        X_train, X_val = X[train_index], X[val_index]
        y_train_orig, y_val_orig = y_orig[train_index], y_orig[val_index]

        # Standardise target variable (y) *within the fold*
        # Fit scaler ONLY on training data for this fold
        y_scaler = StandardScaler()
        y_train = y_scaler.fit_transform(y_train_orig)
        # Transform validation data using the *same* scaler
        y_val = y_scaler.transform(y_val_orig)

        print(f"Train size: {X_train.shape[0]}, Validation size: {X_val.shape[0]}")

        # --- Define and Train GPR Model for the Fold ---
        # Create a new kernel instance for each fold
        kernel = Tanimoto()
        # Use the mean of the *scaled* training data *for this fold*
        mean_function = Constant(np.mean(y_train))
        # Create a new model instance for each fold
        model = gpflow.models.GPR(
            data=(X_train, y_train),
            kernel=kernel,
            mean_function=mean_function,
            noise_variance=0.1  # Initial guess, will be optimised per fold
        )

        # Define optimisation objective for the current fold's model
        @tf.function
        def objective_closure():
            # Make sure it captures the 'model' from the current scope
            return -model.log_marginal_likelihood()

        # Optimise hyperparameters for the current fold
        print("Optimising hyperparameters for fold...")
        optimizer = gpflow.optimizers.Scipy()
        try:
            result = optimizer.minimize(
                objective_closure,
                model.trainable_variables,
                options=dict(maxiter=OPTIMIZER_MAXITER, disp=False) # Keep disp=False for cleaner CV output
            )
            if not result.success:
                print(f"Warning: Optimization may not have converged fully in fold {fold+1}: {result.message}")
            # Optional: Print summary for each fold if needed for debugging
            # print_summary(model)
        except tf.errors.InvalidArgumentError as e:
             print(f"Error during optimization in fold {fold+1}: {e}")
             print("Skipping evaluation for this fold due to optimization issues.")
             # Append NaN or skip appending for this fold's metrics if optimization fails
             continue # Skip to next fold


        # --- Evaluation for the Fold ---
        # Predict on validation set (scaled)
        y_pred_val_scaled, _ = model.predict_f(X_val) # Variance not used here
        y_pred_val_scaled = y_pred_val_scaled.numpy()

        # Predict on training set (scaled) - Optional: For monitoring training fit
        y_pred_train_scaled, _ = model.predict_f(X_train)
        y_pred_train_scaled = y_pred_train_scaled.numpy()

        # Inverse transform predictions to original scale
        y_pred_val = y_scaler.inverse_transform(y_pred_val_scaled)
        # y_val_orig is already in the original scale

        # Calculate metrics for the current fold (on original scale)
        try:
            r2_fold = r2_score(y_val_orig, y_pred_val)
            rmse_fold = np.sqrt(mean_squared_error(y_val_orig, y_pred_val))
            mae_fold = mean_absolute_error(y_val_orig, y_pred_val)

            # Optional: Calculate training RMSE (original scale)
            y_pred_train = y_scaler.inverse_transform(y_pred_train_scaled)
            train_rmse_fold = np.sqrt(mean_squared_error(y_train_orig, y_pred_train))

            # Store metrics
            fold_r2_scores.append(r2_fold)
            fold_rmse_scores.append(rmse_fold)
            fold_mae_scores.append(mae_fold)
            fold_train_rmse_scores.append(train_rmse_fold) # Store training RMSE

            print(f"Fold {fold + 1} Validation R^2: {r2_fold:.3f}")
            print(f"Fold {fold + 1} Validation RMSE: {rmse_fold:.3f} nm")
            print(f"Fold {fold + 1} Validation MAE: {mae_fold:.3f} nm")
            # Optional: print(f"Fold {fold + 1} Train RMSE: {train_rmse_fold:.3f} nm")

        except ValueError as e:
             print(f"Error calculating metrics in fold {fold+1}: {e}")
             print("Check for NaNs or Infs in predictions or validation data.")
             # Handle error, e.g., skip appending metrics for this fold

    # --- Aggregate and Report Cross-Validation Results ---
    print("\n--- Cross-Validation Summary ---")
    if fold_r2_scores: # Check if any folds completed successfully
        avg_r2 = np.mean(fold_r2_scores)
        std_r2 = np.std(fold_r2_scores)
        avg_rmse = np.mean(fold_rmse_scores)
        std_rmse = np.std(fold_rmse_scores)
        avg_mae = np.mean(fold_mae_scores)
        std_mae = np.std(fold_mae_scores)
        avg_train_rmse = np.mean(fold_train_rmse_scores) # Average training RMSE
        std_train_rmse = np.std(fold_train_rmse_scores) # Std dev training RMSE

        print(f"Average Validation R^2: {avg_r2:.3f} ± {std_r2:.3f}")
        print(f"Average Validation RMSE: {avg_rmse:.3f} ± {std_rmse:.3f} nm")
        print(f"Average Validation MAE: {avg_mae:.3f} ± {std_mae:.3f} nm")
        print(f"Average Train RMSE: {avg_train_rmse:.3f} ± {std_train_rmse:.3f} nm") # Report average train RMSE
    else:
        print("No folds completed successfully. Unable to report summary statistics.")

    # Note: We don't typically build one "final" model after CV unless
    # the goal was hyperparameter tuning across folds. Here, the primary
    # goal is robust performance estimation. If you need a final model
    # for deployment, you would usually retrain on the *entire* dataset (X, y_orig)
    # using the hyperparameters found during CV (or just let it optimize again).

Generating fingerprints...
Generated 392 fingerprints.
Loaded 392 valid samples.

Starting 5-Fold Cross-Validation...

--- Fold 1/5 ---
Train size: 313, Validation size: 79
Optimising hyperparameters for fold...
Fold 1 Validation R^2: 0.885
Fold 1 Validation RMSE: 21.188 nm
Fold 1 Validation MAE: 14.500 nm

--- Fold 2/5 ---
Train size: 313, Validation size: 79
Optimising hyperparameters for fold...
Fold 2 Validation R^2: 0.922
Fold 2 Validation RMSE: 18.179 nm
Fold 2 Validation MAE: 12.947 nm

--- Fold 3/5 ---
Train size: 314, Validation size: 78
Optimising hyperparameters for fold...
Fold 3 Validation R^2: 0.874
Fold 3 Validation RMSE: 23.005 nm
Fold 3 Validation MAE: 12.920 nm

--- Fold 4/5 ---
Train size: 314, Validation size: 78
Optimising hyperparameters for fold...
Fold 4 Validation R^2: 0.884
Fold 4 Validation RMSE: 22.911 nm
Fold 4 Validation MAE: 13.858 nm

--- Fold 5/5 ---
Train size: 314, Validation size: 78
Optimising hyperparameters for fold...
Fold 5 Validation R^2: 0.851

In [None]:
import numpy as np
from rdkit.Chem import MolFromSmiles, AllChem

# 假设训练好的模型 m 和 y_scaler 已经存在
# 例如：
# m = ...   # 训练好的 GP 模型
# y_scaler = ...   # 标准化目标值的 StandardScaler

# 1. 定义新的 SMILES 列表（包含可能无效的 SMILES）
new_smiles = [
    "COC(C=C1)=CC=C1/N=N/C2=CC=C(C#N)C([N+]([O-])=O)=CC2",
    "CCN(CC)C1=CC2=C(C=C1)C=C(COCC[N+]([O-])=O)C=C2/N=N/C3=CC=C(OC)C=C3",
    "C=C([N+]([O-])=O)C=C1/N=N/C2=C(NC(CC)=O)C=CC=C2C1=O",
    "CC=CC=CC=C1/N=N/C2=CC([N+]([O-])=O)=CC(COC)=C2",
    "CC=C(C=C1)/N=N/C2=CC=C(C#N)C2=C1",
    "CCCC=C(C=C1)/N=N/C2=CC=C(C([N+]([O-])=O)O)C2=C1",
    "C/C=C(/N=N/C1=CC=C2C=C(C2)C(=CC=C1)C(=O)OC)N=C",
    "CC(C=C1)/N=N/C2=C(CC=C1)C([N+]([O-])=O)=CC=C2",
    "CC=C1N=NC(C=C1)=NCC2=CC=C(C=C2)F",
    "CC=C1C=C(NC(CO)=O)C=C1N=NNC2=CC=C(Cl)C=C2",
    "CC=C1C=C2C#CC3=[N+]C(=CC3=C2N=N1)C=C4C=C(C=C4)F",
    "CC=C1C=CC=C1N=NNC2=CC=C(C(C)C)C=C2",
    "CC=C1N=CC=C1/N=N/C2=CC=C(OC)C=C2",
    "CC=C1N=NC(C=C1)=NCC2=NC3=C(C=C2)C=CC=C3",
    "CC=C1C=CC=C1CN(N=NC2=CC=C(Br)C=C2)C#N",
    "CC=C1C=NC2=C1C=C(NC(C)S(=O)(=O)C)=C2",
    "CC=C1NC2=CC=C(C2=CC=[N+]3=C4C=CN=C4C=C3)C=C1",
    "CC=C1CN=NC2=C1C=CC=C2",
    "CC=C1N=NC(C=C1)=NCC2=CC=C(C=C2)F",
    "CC=C1C=C(NC(CO)=O)C=C1N=NNC2=CC=C(Cl)C=C2",
    "CC=C1C=C2C#CC3=[N+]C(=CC3=C2N=N1)C=C4C=C(C=C4)F",
    "CC=C1C=CC=C1N=NNC2=CC=C(C(C)C)C=C2",
    "CC=C1N=CC=C1/N=N/C2=CC=C(OC)C=C2",
    "CC=C1NC2=CC=C(C2=CC=[N+]3=C4C=CN=C4C=C3)C=C1",
    "CC=C1N=C(S(=O)(=O)C)C=C1",
    "CC=C(C=C1)/N=N/C2=CC(N)=CC=C2",
    "CC=C1C=N/N=C2C=CC(=C2)N=CC=C1",
    "CC/C=C(/N=N/C1=CC=CC=C1)C2=NC=CC=C2",
    "OC(C=C1)=CC=C1/N=N/C2=CC=C(C#N)C=C2",
    "CC=C1N=CC=C1/N=N/C2=CC=C(F)C=C2",
    "CC/C=C(/N=N/C1=CC=C(C)C(C)=C1)C2=NC=CC=C2",
    "C=C(C=C(C)/N=N/C1=CC=C2C=C(C=C2)C=N1)N=O",
    "C=C(C=C1N=N/C2=CC=C(C)C=C2)C(C)=N",
    "C=C(C=C(N=NC1=CC=C(C)C(C)=C1)F)C(F)=F",
    "CC=CC1=C(C=C/C=N/N=C(C)C=C1)/C",
    "C=C(C=C1OC=C(N=N/C2=CC=C(C=C2)C=[N+])C=C1)=O",
    "O=C(C=C1)/N=N/C2=CC=C(C)C=C2",
    "CC=C1C=C2C=CC=C3=C(N=N1)C(=C2)C(=C3)N",
    "N=N/C1=CC2=C(C=C1)C(C)=CC=C2",
    "CC=C1N=C(S(=O)(=O)CC2)C=C1",
    "CC=C1N=CC=C1/N=N/C2=NNC=C2C#N",
    "CC=C1NC2=C(C#N)C=CCCCC2C=C1",
    "CC=C1C=CN/N=C2C=CCC=C2C=C1",
    "C=CC=C(/N=N/C1=CC(Br)=CC=C1)C(C)=C",
    "CC=C1C=C([N+]([O-])=O)C=C1N=N/C2=CC=C(F)C=C2",
    "CC=C1C=CCC2=CC=C(C=C1)C([N+]([O-])=O)=C2",
    "C=C(C=C1)/N=N/C2=CC=N(N)C=C2",
    "C=C(C=C1[C](N=O)=CC=C1)/N=N/C2=C(N)C",
    "C=C(/C=C/N=N/N3CC=C3)=C1C=CC=C(F)C=C1",
    "CC=C1NC=C2N=N/N=C1C=C(C=CC=C2)=N",
    "CC=C1N=CN=N/C2=CC=C3C=CC=C3C=C2",
    "CC=C1C=C2C(C#N)C=C(N=N1)C2",
    "N=N/C1=CC=C(/C=C2)=CC=N2",
    "CC=C1C=CN/N=C2C=CC(C)=CC=C2",
    "CC(C=C1)/N=N/C2=CC=C(C(=O)O)C=C2",
    "N=C(C=C)/N=N/C1=C2C=CC=C1C(C)=CC=C2",
    "CC=C(C=C1)/N=N/C2=CC(O)=C(C=C2)C#N",
    "C=C(C=C1)/N=N/C2=CC=C(C(N)=O)C=C2",
    "C=C/C=C(/N=N/C1=CC=C(N=O)C=C1)C(C)=C",
    "CC=CC=C1C=C1N=N/N=C2C(=CC)=CC=C2",
    "C=C(C=C(N=NC1=N/C=C(C)#N)C1)=O",
    "C=C(/C=C/N=N/C1=CC=C(C)[N+]=O)C2=CC=C2",
    "C=CC=C(/N=N/C1=CC=C(O)C=C1)C2=CC=CC=C2",
]

# 2. 将 SMILES 转换为 RDKit 分子对象
new_rdkit_mols = [MolFromSmiles(s) for s in new_smiles]

# 3. 过滤掉解析失败的 SMILES
valid_data = [(s, mol) for s, mol in zip(new_smiles, new_rdkit_mols) if mol is not None]

if len(valid_data) < len(new_smiles):
    print("警告：以下 SMILES 解析失败，将被跳过：")
    for s, mol in zip(new_smiles, new_rdkit_mols):
        if mol is None:
            print("  ", s)

# 提取有效的 SMILES 和对应的分子对象
if valid_data:
    valid_smiles, valid_rdkit_mols = zip(*valid_data)
else:
    valid_smiles, valid_rdkit_mols = [], []

# 4. 对有效分子计算 Morgan 指纹（确保参数与训练时一致：radius=3, nBits=2048）
if valid_rdkit_mols:
    new_X = [AllChem.GetMorganFingerprintAsBitVect(mol, radius=3, nBits=2048)
             for mol in valid_rdkit_mols]
    new_X = np.asarray(new_X).astype(np.float64)
    print("有效 SMILES:")
    for s in valid_smiles:
        print("  ", s)
    print("计算得到的指纹数组形状为：", new_X.shape)
else:
    print("没有有效的 SMILES 可供计算指纹。")
    new_X = None

# 5. 使用训练好的 GP 模型进行预测
if new_X is not None:
    # 使用 predict_f 得到预测均值和预测不确定性
    y_new_pred, y_new_var = m.predict_f(new_X)
    # 将预测结果从 TensorFlow tensor 转换为 numpy 数组（如果还未转换）
    y_new_pred = y_new_pred.numpy()

    # 如果在训练时对目标进行了标准化，则用 y_scaler 进行逆变换
    y_new_pred_rescaled = y_scaler.inverse_transform(y_new_pred)

    # 输出预测结果
    print("\n新的 SMILES 预测的属性值：")
    for s, pred in zip(valid_smiles, y_new_pred_rescaled):
        print(f"SMILES: {s} -> Predicted Property: {pred[0]:.3f}")
else:
    print("没有有效 SMILES 进行预测。")


警告：以下 SMILES 解析失败，将被跳过：
   CC=CC=CC=C1/N=N/C2=CC([N+]([O-])=O)=CC(COC)=C2
   CC=C1C=NC2=C1C=C(NC(C)S(=O)(=O)C)=C2
   CC=C1NC2=CC=C(C2=CC=[N+]3=C4C=CN=C4C=C3)C=C1
   CC=C1NC2=CC=C(C2=CC=[N+]3=C4C=CN=C4C=C3)C=C1
   CC=C(C=C1)/N=N/C2=CC(N)=CC=C2
   C=C(C=C1N=N/C2=CC=C(C)C=C2)C(C)=N
   C=C(C=C(N=NC1=CC=C(C)C(C)=C1)F)C(F)=F
   C=C(C=C1OC=C(N=N/C2=CC=C(C=C2)C=[N+])C=C1)=O
   O=C(C=C1)/N=N/C2=CC=C(C)C=C2
   CC=C1C=C2C=CC=C3=C(N=N1)C(=C2)C(=C3)N
   CC=C1N=C(S(=O)(=O)CC2)C=C1
   C=C(C=C1)/N=N/C2=CC=N(N)C=C2
   C=C(C=C1[C](N=O)=CC=C1)/N=N/C2=C(N)C
   C=C(/C=C/N=N/N3CC=C3)=C1C=CC=C(F)C=C1
   CC=C1NC=C2N=N/N=C1C=C(C=CC=C2)=N
   CC=C1N=CN=N/C2=CC=C3C=CC=C3C=C2
   N=N/C1=CC=C(/C=C2)=CC=N2
   CC=C1C=CN/N=C2C=CC(C)=CC=C2
   CC(C=C1)/N=N/C2=CC=C(C(=O)O)C=C2
   CC=C(C=C1)/N=N/C2=CC(O)=C(C=C2)C#N
   C=C(C=C1)/N=N/C2=CC=C(C(N)=O)C=C2
   CC=CC=C1C=C1N=N/N=C2C(=CC)=CC=C2
   C=C(C=C(N=NC1=N/C=C(C)#N)C1)=O
   C=C(/C=C/N=N/C1=CC=C(C)[N+]=O)C2=CC=C2
有效 SMILES:
   COC(C=C1)=CC=C1/N=N/C2=CC=C(C#N)C([N+]([O-])=O)=

[10:14:27] SMILES Parse Error: unclosed ring for input: 'CC=CC=CC=C1/N=N/C2=CC([N+]([O-])=O)=CC(COC)=C2'
[10:14:27] Explicit valence for atom # 8 C, 5, is greater than permitted
[10:14:27] Explicit valence for atom # 11 N, 4, is greater than permitted
[10:14:27] Explicit valence for atom # 11 N, 4, is greater than permitted
[10:14:27] SMILES Parse Error: unclosed ring for input: 'CC=C(C=C1)/N=N/C2=CC(N)=CC=C2'
[10:14:27] SMILES Parse Error: unclosed ring for input: 'C=C(C=C1N=N/C2=CC=C(C)C=C2)C(C)=N'
[10:14:27] Explicit valence for atom # 17 F, 2, is greater than permitted
[10:14:27] Explicit valence for atom # 1 C, 5, is greater than permitted
[10:14:27] SMILES Parse Error: unclosed ring for input: 'O=C(C=C1)/N=N/C2=CC=C(C)C=C2'
[10:14:27] Explicit valence for atom # 8 C, 5, is greater than permitted
[10:14:27] SMILES Parse Error: unclosed ring for input: 'CC=C1N=C(S(=O)(=O)CC2)C=C1'
[10:14:27] SMILES Parse Error: unclosed ring for input: 'C=C(C=C1)/N=N/C2=CC=N(N)C=C2'
[10:14:27] SMIL