<a href="https://colab.research.google.com/github/abdulwasaeee/NovaMol/blob/main/bestmodel%2Bresults.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
# =============================================================================
# Project: Multi-Task Generative Chemistry and GNN-based Property Prediction
#
# Final Version - All-in-One Script
#
# Objective:
# 1. Generative Model (RNN): Train a model to generate new, valid molecules.
# 2. Multi-Task Predictive Model (GNN): Train a SINGLE GNN to simultaneously
#    predict four key chemical properties.
# 3. Validation: Use a held-out Test Set to robustly validate the GNN's
#    predictive accuracy against known ground truth values.
# 4. Analysis: For novel molecules, predict their properties, assess
#    manufacturability, and suggest potential industrial applications.
# =============================================================================

# --- Step 1: Setup and Installations ---
import subprocess
import sys

def install_packages():
    """Installs required packages using a robust method for PyTorch Geometric."""
    print("--- Checking and installing dependencies ---")
    standard_packages = [
        "rdkit", "pandas", "scikit-learn", "tqdm", "torch",
        "torchvision", "torchaudio", "kagglehub", "selfies"
    ]
    for package in standard_packages:
        try:
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', package])
        except subprocess.CalledProcessError:
            print(f"ERROR: Failed to install {package}. Please try installing it manually.")
            sys.exit(1)
    try:
        import torch
        TORCH_VERSION = torch.__version__.split('+')[0]
        CUDA_VERSION = torch.version.cuda
        CUDA_STR = f"cu{CUDA_VERSION.replace('.', '')}" if CUDA_VERSION else 'cpu'
        print(f"Detected PyTorch {TORCH_VERSION} and device type {CUDA_STR}.")
        PYG_URL = f'https://data.pyg.org/whl/torch-{TORCH_VERSION}+{CUDA_STR}.html'
        pyg_packages = ['torch-scatter', 'torch-sparse', 'torch-cluster', 'torch-geometric']
        for package in pyg_packages:
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', package, '-f', PYG_URL])
        print("--- All dependencies installed successfully. ---")
    except Exception as e:
        print(f"ERROR: Failed to install PyG packages: {e}")
        sys.exit(1)

install_packages()

# --- Step 2: Imports and Global Configuration ---
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import requests
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GINConv, global_add_pool
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors, Descriptors
from IPython.display import display, HTML
import selfies as sf
import kagglehub

# --- Configuration ---
PROPERTIES_TO_PREDICT = ['dipol_mom', 'Polarizability', 'HL_gap', 'spatial extent']
N_PROPERTIES = len(PROPERTIES_TO_PREDICT)
N_MOLECULES_GNN = 20000
N_MOLECULES_RNN = 50000
BATCH_SIZE = 128
LEARNING_RATE = 1e-3
N_EPOCHS_GNN = 50
N_EPOCHS_RNN = 25
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# --- Step 3: Define Helper Functions ---

def smiles_to_graph(smiles: str):
    """Converts a SMILES string into a PyTorch Geometric Data object."""
    mol = Chem.MolFromSmiles(smiles)
    if mol is None: return None
    atom_features = [[
        atom.GetAtomicNum(), atom.GetFormalCharge(), float(atom.GetHybridization()),
        float(atom.GetIsAromatic()), atom.GetTotalNumHs(), atom.GetTotalValence()
    ] for atom in mol.GetAtoms()]
    x = torch.tensor(atom_features, dtype=torch.float)
    if mol.GetNumBonds() > 0:
        row, col = [], []
        for bond in mol.GetBonds():
            start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
            row.extend([start, end])
            col.extend([end, start])
        edge_index = torch.tensor([row, col], dtype=torch.long)
    else:
        edge_index = torch.empty((2, 0), dtype=torch.long)
    return Data(x=x, edge_index=edge_index)

def evaluate_multitask_gnn(loader, model, scalers):
    """Evaluates the Multi-Task GNN and returns MAE for each property."""
    model.eval()
    predictions, targets = [], []
    with torch.no_grad():
        for batch in loader:
            batch = batch.to(DEVICE)
            out = model(batch)
            # Ensure batch.y is correctly shaped [batch_size, N_PROPERTIES]
            targets.append(batch.y.view(-1, N_PROPERTIES).cpu().numpy())
            predictions.append(out.cpu().numpy())


    predictions = np.vstack(predictions)
    targets = np.vstack(targets)

    maes = {}
    for i, prop in enumerate(PROPERTIES_TO_PREDICT):
        pred_real = scalers[prop].inverse_transform(predictions[:, i].reshape(-1, 1)).flatten()
        targ_real = scalers[prop].inverse_transform(targets[:, i].reshape(-1, 1)).flatten()
        maes[prop] = np.mean(np.abs(pred_real - targ_real))

    return maes

def check_pubchem(smiles):
    """Checks if a molecule exists in PubChem. Returns CID if found, else None."""
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/{smiles}/cids/JSON"
    try:
        response = requests.get(url, timeout=5)
        if response.status_code == 200:
            data = response.json()
            return data.get("IdentifierList", {}).get("CID", [None])[0]
    except: return None
    return None

def analyze_novel_molecule_robust(smiles: str):
    """Calculates robust properties and a custom complexity score for a novel molecule."""
    results = {"Complexity_Score": "N/A", "Num_Rings": "N/A", "TPSA": "N/A"}
    try:
        mol = Chem.MolFromSmiles(smiles)
        if not mol: return results
        num_rings = rdMolDescriptors.CalcNumRings(mol)
        num_spiro = rdMolDescriptors.CalcNumSpiroAtoms(mol)
        num_bridgehead = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
        tpsa = Descriptors.TPSA(mol)
        complexity_score = (num_rings * 1.0 + num_spiro * 2.5 + num_bridgehead * 2.5)
        results.update({
            "Complexity_Score": f"{complexity_score:.2f}",
            "Num_Rings": num_rings,
            "TPSA": f"{tpsa:.2f}"
        })
    except: pass
    return results

def suggest_industrial_application(properties: dict):
    """Suggests industrial uses based on predicted molecular properties."""
    apps = []
    if properties['HL_gap'] < 0.18:
        apps.append("Organic Electronics")
    if 20 < properties['Polarizability'] < 180 and properties['spatial extent'] < 8000:
        apps.append("Pharmaceuticals")
    if properties['dipol_mom'] > 4.0:
        apps.append("Cosmetics / High-Polarity Materials")

    if not apps: return "General Chemical Reagent"
    return ", ".join(apps)

# --- Step 4: Define Model Architectures ---

class MultiTaskGNN(nn.Module):
    def __init__(self, in_dim, hidden_dim=128, out_dim=4):
        super().__init__()
        nn1 = nn.Sequential(nn.Linear(in_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim))
        self.conv1 = GINConv(nn1)
        nn2 = nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim))
        self.conv2 = GINConv(nn2)
        self.lin1 = nn.Linear(hidden_dim, hidden_dim)
        self.lin2 = nn.Linear(hidden_dim, out_dim)
    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        x = global_add_pool(x, batch)
        x = F.relu(self.lin1(x))
        return self.lin2(x)

class SELFIES_RNN(nn.Module):
    def __init__(self, vocab_size, emb_size=128, hidden_size=512, num_layers=3):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_size, padding_idx=0)
        self.rnn = nn.LSTM(emb_size, hidden_size, num_layers, batch_first=True, dropout=0.2)
        self.fc = nn.Linear(hidden_size, vocab_size)
    def forward(self, x, hidden=None):
        emb = self.embedding(x)
        out, hidden = self.rnn(emb, hidden)
        return self.fc(out), hidden

def sample_selfies(model, token2idx, idx2token, max_len=50, temperature=1.0):
    model.eval()
    start_token = '[C]'
    x = torch.tensor([[token2idx[start_token]]], device=DEVICE)
    hidden = None
    tokens = [start_token]
    for _ in range(max_len):
        out, hidden = model(x, hidden)
        probs = F.softmax(out.squeeze() / temperature, dim=-1)
        idx = torch.multinomial(probs, 1).item()
        if idx == 0: break
        tokens.append(idx2token[idx])
        x = torch.tensor([[idx]], device=DEVICE)
    try: return sf.decoder(''.join(tokens))
    except: return None

# =============================================================================
# Main Execution Block
# =============================================================================
if __name__ == "__main__":
    print(f"\nProject starting on device: {DEVICE}\n")
    path = kagglehub.dataset_download("nikitamanaenkov/qm40-molecular-qm-dataset")
    df_main = pd.read_csv(os.path.join(path, "main.csv")).dropna(subset=PROPERTIES_TO_PREDICT)

    print("\n--- 1. Preparing Data for Multi-Task GNN ---")
    gnn_data_list = []
    subset_df_gnn = df_main.head(N_MOLECULES_GNN)
    for _, row in tqdm(subset_df_gnn.iterrows(), total=subset_df_gnn.shape[0], desc="Creating GNN graphs"):
        graph = smiles_to_graph(row['smile'])
        if graph:
            graph.y = torch.tensor([row[p] for p in PROPERTIES_TO_PREDICT], dtype=torch.float)
            graph.smiles = row['smile']
            gnn_data_list.append(graph)

    train_val_data, test_data = train_test_split(gnn_data_list, test_size=0.15, random_state=42)
    train_data, val_data = train_test_split(train_val_data, test_size=0.17, random_state=42)

    scalers = {}
    for i, prop in enumerate(PROPERTIES_TO_PREDICT):
        targets = np.array([d.y[i].item() for d in train_data]).reshape(-1, 1)
        scalers[prop] = StandardScaler().fit(targets)
        for d in train_val_data + test_data:
            d.y[i] = torch.tensor(scalers[prop].transform([[d.y[i].item()]])[0,0])

    train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=BATCH_SIZE)
    test_loader = DataLoader(test_data, batch_size=BATCH_SIZE)

    print(f"Data split: {len(train_data)} train, {len(val_data)} validation, {len(test_data)} test.")

    print("\n--- 2. Training Multi-Task Predictive GNN ---")
    gnn_model = MultiTaskGNN(in_dim=train_data[0].num_node_features, out_dim=N_PROPERTIES).to(DEVICE)
    optimizer_gnn = torch.optim.Adam(gnn_model.parameters(), lr=LEARNING_RATE)
    loss_fn_gnn = nn.MSELoss()
    best_val_mae_sum = float('inf')

    for epoch in range(1, N_EPOCHS_GNN + 1):
        gnn_model.train()
        for batch in train_loader:
            batch = batch.to(DEVICE)
            optimizer_gnn.zero_grad()
            out = gnn_model(batch)
            # Ensure batch.y is correctly shaped [batch_size, N_PROPERTIES]
            loss = loss_fn_gnn(out, batch.y.view(-1, N_PROPERTIES))
            loss.backward()
            optimizer_gnn.step()

        val_maes = evaluate_multitask_gnn(val_loader, gnn_model, scalers)
        current_mae_sum = sum(val_maes.values())
        if current_mae_sum < best_val_mae_sum:
            best_val_mae_sum = current_mae_sum
            torch.save(gnn_model.state_dict(), 'best_gnn_model.pth')
        print(f"GNN Epoch {epoch:02d} | Val MAEs: " + ", ".join([f"{k}={v:.3f}" for k,v in val_maes.items()]))

    gnn_model.load_state_dict(torch.load('best_gnn_model.pth'))

    print("\n--- 3. Training Generative RNN ---")
    selfies_list = [sf.encoder(smi) for smi in tqdm(df_main['smile'].head(N_MOLECULES_RNN), desc="Encoding to SELFIES") if smi and sf.encoder(smi)]
    all_tokens = set(t for s in selfies_list for t in sf.split_selfies(s))
    token2idx = {t: i + 1 for i, t in enumerate(sorted(all_tokens))}; token2idx['<PAD>'] = 0
    idx2token = {i: t for t, i in token2idx.items()}; vocab_size = len(token2idx)
    max_len = max(len(list(sf.split_selfies(s))) for s in selfies_list)
    selfies_tensor = torch.stack([torch.tensor([token2idx.get(t, 0) for t in list(sf.split_selfies(s))] + [0] * (max_len - len(list(sf.split_selfies(s)))), dtype=torch.long) for s in selfies_list])
    rnn_dataset = torch.utils.data.TensorDataset(selfies_tensor[:, :-1], selfies_tensor[:, 1:])
    rnn_loader = torch.utils.data.DataLoader(rnn_dataset, batch_size=128, shuffle=True)
    rnn_model = SELFIES_RNN(vocab_size).to(DEVICE)
    opt_rnn = torch.optim.Adam(rnn_model.parameters(), lr=1e-3)
    for epoch in range(1, N_EPOCHS_RNN + 1):
        for x, y in rnn_loader:
            x, y = x.to(DEVICE), y.to(DEVICE)
            opt_rnn.zero_grad()
            out, _ = rnn_model(x)
            loss = F.cross_entropy(out.reshape(-1, vocab_size), y.reshape(-1), ignore_index=0)
            loss.backward()
            opt_rnn.step()
        print(f"RNN Epoch {epoch:02d}, Loss: {loss.item():.4f}")

    print("\n--- 4. Generating and Segregating Novel Molecules ---")
    generated_smiles = [sample_selfies(rnn_model, token2idx, idx2token, temperature=0.9) for _ in tqdm(range(200), desc="Generating Molecules")]
    valid_smiles = [s for s in generated_smiles if s and Chem.MolFromSmiles(s)]
    novel_molecules_smiles = [s for s in tqdm(valid_smiles, desc="Checking Novelty") if not check_pubchem(s)]

    # =============================================================================
    #                         FINAL ANALYSIS AND REPORTING
    # =============================================================================

    print("\n" + "="*80)
    print("                    FINAL PROJECT SUMMARY & ANALYSIS")
    print("="*80)

    # --- Section A: GNN Performance on Test Data ---
    print("\n--- A. GNN PERFORMANCE ON UNSEEN TEST DATA ---")
    test_maes = evaluate_multitask_gnn(test_loader, gnn_model, scalers)
    print("Final GNN Performance on Test Set:")
    for prop, mae in test_maes.items():
        print(f"  - MAE for {prop:<15}: {mae:.4f}")

    print("\n--- PERFORMANCE SPOTLIGHT: PREDICTED VS. ACTUAL (Sample from Test Set) ---")
    sample_size = min(10, len(test_data))
    sample_indices = np.random.choice(len(test_data), sample_size, replace=False)
    sample_data = [test_data[i] for i in sample_indices]
    sample_loader = DataLoader(sample_data, batch_size=sample_size)
    batch = next(iter(sample_loader))
    batch = batch.to(DEVICE)
    predictions_scaled = gnn_model(batch).cpu().detach().numpy()

    spotlight_results = []
    # UPDATED: Create dynamic headers for the multi-task table
    headers = ["SMILES"]
    for prop in PROPERTIES_TO_PREDICT:
        headers.extend([f"Pred_{prop[:10]}", f"Actual_{prop[:10]}"]) # Abbreviate for cleaner table

    for i in range(sample_size):
        row = [sample_data[i].smiles]
        for j, prop in enumerate(PROPERTIES_TO_PREDICT):
            pred_real = scalers[prop].inverse_transform(predictions_scaled[i, j].reshape(1, -1))[0,0]
            actual_real = scalers[prop].inverse_transform(sample_data[i].y[j].reshape(1, -1))[0,0]
            row.extend([f"{pred_real:.2f}", f"{actual_real:.2f}"])
        spotlight_results.append(row)

    df_spotlight = pd.DataFrame(spotlight_results, columns=headers)
    display(HTML(df_spotlight.to_html(index=False, justify='left')))

    # --- Section B: Business Analysis of Novel Molecules ---
    print("\n--- B. BUSINESS ANALYSIS OF NOVEL MOLECULES ---")
    if novel_molecules_smiles:
        novel_graphs = [smiles_to_graph(s) for s in novel_molecules_smiles]
        valid_novel_graphs_data = [(smi, g) for smi, g in zip(novel_molecules_smiles, novel_graphs) if g is not None]

        if valid_novel_graphs_data:
            smiles_for_analysis = [smi for smi, g in valid_novel_graphs_data]
            graphs_for_analysis = [g for smi, g in valid_novel_graphs_data]
            predict_loader = DataLoader(graphs_for_analysis, batch_size=len(graphs_for_analysis))
            batch = next(iter(predict_loader)).to(DEVICE)
            preds_scaled_novel = gnn_model(batch).cpu().detach().numpy()

            analysis_results = []
            for i, smiles in enumerate(smiles_for_analysis):
                predicted_props = {}
                for j, prop in enumerate(PROPERTIES_TO_PREDICT):
                    predicted_props[prop] = scalers[prop].inverse_transform(preds_scaled_novel[i, j].reshape(1, -1))[0,0]
                complexity_data = analyze_novel_molecule_robust(smiles)
                applications = suggest_industrial_application(predicted_props)

                analysis_results.append([
                    smiles,
                    f"{predicted_props['dipol_mom']:.2f}",
                    complexity_data["Complexity_Score"],
                    applications
                ])

            df_analysis = pd.DataFrame(analysis_results, columns=[
                "Novel SMILES", "Predicted Dipole (D)", "Complexity Score", "Suggested Applications"
            ])
            display(HTML(df_analysis.head(15).to_html(index=False, justify='left')))
    else:
        print("No novel molecules were generated to analyze.")

    print("\n" + "="*80)
    print("                              PROJECT COMPLETE")
    print("="*80)

--- Checking and installing dependencies ---
Detected PyTorch 2.8.0 and device type cu126.
--- All dependencies installed successfully. ---

Project starting on device: cuda

Using Colab cache for faster access to the 'qm40-molecular-qm-dataset' dataset.

--- 1. Preparing Data for Multi-Task GNN ---


Creating GNN graphs: 100%|██████████| 20000/20000 [00:11<00:00, 1723.65it/s]


Data split: 14110 train, 2890 validation, 3000 test.

--- 2. Training Multi-Task Predictive GNN ---
GNN Epoch 01 | Val MAEs: dipol_mom=1.209, Polarizability=17.247, HL_gap=0.020, spatial extent=1483.735
GNN Epoch 02 | Val MAEs: dipol_mom=1.150, Polarizability=14.090, HL_gap=0.021, spatial extent=1217.291
GNN Epoch 03 | Val MAEs: dipol_mom=1.178, Polarizability=13.743, HL_gap=0.020, spatial extent=1134.029
GNN Epoch 04 | Val MAEs: dipol_mom=1.143, Polarizability=12.934, HL_gap=0.019, spatial extent=1270.624
GNN Epoch 05 | Val MAEs: dipol_mom=1.127, Polarizability=11.962, HL_gap=0.018, spatial extent=1095.440
GNN Epoch 06 | Val MAEs: dipol_mom=1.135, Polarizability=11.851, HL_gap=0.018, spatial extent=1154.258
GNN Epoch 07 | Val MAEs: dipol_mom=1.107, Polarizability=10.634, HL_gap=0.017, spatial extent=1000.627
GNN Epoch 08 | Val MAEs: dipol_mom=1.113, Polarizability=10.355, HL_gap=0.018, spatial extent=996.842
GNN Epoch 09 | Val MAEs: dipol_mom=1.073, Polarizability=10.762, HL_gap=0.017

Encoding to SELFIES: 100%|██████████| 50000/50000 [00:23<00:00, 2122.78it/s]


RNN Epoch 01, Loss: 1.2777
RNN Epoch 02, Loss: 1.0769
RNN Epoch 03, Loss: 0.9922
RNN Epoch 04, Loss: 0.9193
RNN Epoch 05, Loss: 0.9161
RNN Epoch 06, Loss: 0.8643
RNN Epoch 07, Loss: 0.8428
RNN Epoch 08, Loss: 0.8461
RNN Epoch 09, Loss: 0.8278
RNN Epoch 10, Loss: 0.8004
RNN Epoch 11, Loss: 0.8292
RNN Epoch 12, Loss: 0.8170
RNN Epoch 13, Loss: 0.8155
RNN Epoch 14, Loss: 0.7554
RNN Epoch 15, Loss: 0.7418
RNN Epoch 16, Loss: 0.7805
RNN Epoch 17, Loss: 0.7464
RNN Epoch 18, Loss: 0.7571
RNN Epoch 19, Loss: 0.7210
RNN Epoch 20, Loss: 0.7096
RNN Epoch 21, Loss: 0.7161
RNN Epoch 22, Loss: 0.7180
RNN Epoch 23, Loss: 0.7075
RNN Epoch 24, Loss: 0.6986
RNN Epoch 25, Loss: 0.6757

--- 4. Generating and Segregating Novel Molecules ---


Generating Molecules: 100%|██████████| 200/200 [00:07<00:00, 28.50it/s]
Checking Novelty: 100%|██████████| 200/200 [01:52<00:00,  1.78it/s]


                    FINAL PROJECT SUMMARY & ANALYSIS

--- A. GNN PERFORMANCE ON UNSEEN TEST DATA ---
Final GNN Performance on Test Set:
  - MAE for dipol_mom      : 1.0080
  - MAE for Polarizability : 4.1172
  - MAE for HL_gap         : 0.0107
  - MAE for spatial extent : 532.0174

--- PERFORMANCE SPOTLIGHT: PREDICTED VS. ACTUAL (Sample from Test Set) ---





SMILES,Pred_dipol_mom,Actual_dipol_mom,Pred_Polarizabi,Actual_Polarizabi,Pred_HL_gap,Actual_HL_gap,Pred_spatial ex,Actual_spatial ex
CC[C@H](C)NC(=S)Nc1cn(C)cn1,5.45,2.56,144.72,149.34,-0.18,-0.2,4131.09,4524.86
COC1(C(=O)N[C@H]2CCS[C@H]2C)CC1,2.74,3.82,131.53,136.25,-0.23,-0.24,3571.01,3977.31
O=C(N[C@H]1CCSC1)N1CC[C@H](F)C1,3.21,4.87,129.45,130.02,-0.25,-0.25,4656.62,4990.9
CCC(C[C@H](C)CC)=NN[C@H](C)CC,2.85,1.48,146.05,152.53,-0.26,-0.22,4789.8,4787.75
COC(=O)C12CCC(C(=O)NCC(C)C)(CC1)CC2,2.68,1.44,165.26,173.05,-0.26,-0.25,6648.83,7657.15
O=C(COC1CC1)OCCOCCO,2.2,3.69,115.85,115.76,-0.26,-0.27,5072.14,5504.06
C=C[C@@H](O)C(=O)OCCCS(C)(=O)=O,3.81,5.42,126.4,121.19,-0.24,-0.24,5138.56,5544.82
O=C1CN([C@H](F)c2ccnc(Cl)c2)C1,2.86,2.6,117.47,119.32,-0.21,-0.21,3131.65,3785.09
O=C(Nc1cc[nH]n1)[C@H]1CCCCS1,3.1,3.09,136.99,135.62,-0.2,-0.22,4408.26,4233.22
Cc1ncc(Cl)c(OC2CCCC2)n1,2.88,2.29,136.33,133.73,-0.2,-0.21,4094.27,3560.06



--- B. BUSINESS ANALYSIS OF NOVEL MOLECULES ---


Novel SMILES,Predicted Dipole (D),Complexity Score,Suggested Applications
CC(C)[C@@H1](C)C(=O)N1[C@H1]C[C@@H1](NC(=O)[C@@H1]2CCC3=NN(C)C=C3C2)C1CCO,3.8,3.0,Organic Electronics
C[C@@H1]1C2N(C)CC[C@@H1]1NC(=O)N3CCC[C@@H1](C4=CC=C(F)C=C4)C3(C)C2=O,3.06,9.0,Organic Electronics
COC=C(CN1C(=O)CCOC2=CC=C(OC)C(OC3)=C2)C=C(O)C=CCCC3C1,5.03,13.0,"Organic Electronics, Cosmetics / High-Polarity Materials"
CCC(C1C)CCN(CC2=CSC=N2)C1=O,2.96,2.0,"Organic Electronics, Pharmaceuticals"
CCCN1C2=CC(C(=O)NCCCNC3=CN=CC(C#N)=N3)=C1C(C)CC2=O,5.7,8.0,"Organic Electronics, Cosmetics / High-Polarity Materials"
CCC1C(O)(C(=O)NC2=CC(F)=CC=C2Cl)CC1=O,2.89,2.0,"Organic Electronics, Pharmaceuticals"
CCO[C@H1](CC)C(=O)N1C[C@@H1](C)OC(C)(C)C1(C)CCC(C)=O,2.95,1.0,Organic Electronics
COC[C@@H1](OC(=O)/C=C\SC)C(C)=O,3.16,0.0,"Organic Electronics, Pharmaceuticals"
C1C=CC=C(F)C1(C(=O)NC2C[C@H1]3CN(C(=O)C4CC4)[C@@H1]3C2=C)F,2.44,4.0,Organic Electronics
CN(C(=O)CCOCC)[C@H1]1CCSC1=O,3.49,1.0,"Organic Electronics, Pharmaceuticals"



                              PROJECT COMPLETE
