
# Nevergrad first call
Quick notebook to exercise the optimization step (Nevergrad) with the cached pipeline. Adjust overrides to control runtime and target.


In [1]:

# Ensure repo on sys.path (works when run from CLI or inside the notebook directory)
import sys
from pathlib import Path

# Start from CWD; if __file__ defined (exported), use that
repo_root = Path.cwd()
if '__file__' in globals():
    repo_root = Path(__file__).resolve().parents[2]

# Climb parents until we find the top-level nevermore/configs marker
marker = Path('nevermore/configs/default.yaml')
for parent in [repo_root] + list(repo_root.parents):
    if (parent / marker).exists():
        repo_root = parent
        break

if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))
print('Repo root1:', repo_root)


Repo root1: /scratch/home/sr3622/Firm-DTI/Firm-DTI2


In [None]:
import json
import pandas as pd
from nevermore.pipeline import NevermorePipeline


STAGE_ORDER = [
    "ingest",
    "features",
    "optimization",
    "retrieval",
    "visualization",
    "docking",
    "report",
]


# Config and quick overrides
config_path = repo_root / 'nevermore' / 'configs' / 'default.yaml'
override_sample_index = None  # e.g., 10
override_target_sequence="MGLKAAQKTLFPLRSIDDVVRLFAAELGREEPDLVLLSLVLGFVEHFLAVNRVIPTNVPELTFQPSPAPDPPGGLTYFPVADLSIIAALYARFTAQIRGAVDLSLYPREGGVSSRELVKKVSDVIWNSLSRSYFKDRAHIQSLFSFITGTKLDSSGVAFAVVGACQALGLRDVHLALSEDHAWVVFGPNGEQTAEVTWHGKGNEDRRGQTVNAGVAERSWLYLKGSYMRCDRKMEVAFMVCAINPSIDLHTDSLELLQLQQKLLWLLYDLGHLERYPMALGNLADLEELEPTPGRPDPLTLYHKGIASAKTYYRDEHIYPYMYLAGYHCRNRNVREALQAWADTATVIQDYNYCREDEEIYKEFFEVANDVIPNLLKEAASLLEAGEERPGEQSQGTQSQGSALQDPECFAHLLRFYDGICKWEEGSPTPVLHVGWATFLVQSLGRFEGQVRQKVRIVSREAEAAEAEEPWGEEAREGRRRGPRRESKPEEPPPPKKPALDKGLGTGQGAVSGPPRKPPGTVAGTARGPEGGSTAQVPAPTASPPPEGPVLTFQSEKMKGMKELLVATKINSSAIKLQLTAQSQVQMKKQKVSTPSDYTLSFLKRQRKGL"
override_baseline_smiles ="CCN(C(C)C)C(=O)C1=CC(F)=CC=C1OC1=CN=CN=C1N1CC2(C1)CCN(C[C@H]1CC[C@@H](CC1)NS(=O)(=O)CC)CC2"

# override_target_sequence="SGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQ"
# override_baseline_smiles ="CN(C)CCCSC1=CC=CC=C1NC(=O)/C=C/C2=CC=CC=C2"

pipe = NevermorePipeline(config_path=config_path)
if override_target_sequence is not None:
    pipe.config.optimization.target_sequence = override_target_sequence
if override_baseline_smiles is not None:
    pipe.config.optimization.baseline_smiles = override_baseline_smiles

results = pipe.run(up_to='report', verbose=True)
opt_res = results.get('report')
print('Optimization signature:', opt_res.signature)
print('Outputs:', opt_res.outputs)
print('Details:', json.dumps(opt_res.details, indent=2))

[ingest] starting...
[ingest] cached (sig=d2f8d2740f3a)
[features] starting...
[features] cached (sig=329d35f96e63)
[optimization] starting...


  import pkg_resources


[optimization] done (sig=cb1a68f242ff)
[retrieval] starting...
[retrieval] cached (sig=5b0418f4a0cd)
[visualization] starting...
[visualization] cached (sig=7dff4aa5e0e2)
[docking] starting...
[docking] cached (sig=f7824e31ab5f)
[report] starting...
[report] done (sig=fff857fb8382)
Optimization signature: fff857fb8382
Outputs: {'report': PosixPath('/scratch/home/sr3622/Firm-DTI/Firm-DTI2/pipeline_output/report/fff857fb8382/nevermore_report.csv')}
Details: {
  "rows": 101
}


In [None]:
# [ingest] starting...
# [ingest] cached (sig=dd9a0696f2c3)
# [features] starting...
# [features] cached (sig=5d19f5fd62f6)
# [optimization] starting...
# /home/sr3622/miniconda3/envs/digress10/lib/python3.10/site-packages/hyperopt/atpe.py:19: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
#   import pkg_resources
# /home/sr3622/miniconda3/envs/digress10/lib/python3.10/site-packages/nevergrad/parametrization/_datalayers.py:107: NevergradRuntimeWarning: Bounds are 1.503239631652832 sigma away from each other at the closest, you should aim for at least 3 for better quality.
#   warnings.warn(
# [optimization] done (sig=c61c610faf53)
# [retrieval] starting...
# [retrieval] done (sig=f23b63df35d8)
# [visualization] starting...
# [visualization] done (sig=71e07607a80b)
# [do
# cking] starting...
# [docking] done (sig=8bbf4bf633e2)
# [report] starting...
# [report] done (sig=9ddc5298a8e2)
# Optimization signature: 9ddc5298a8e2
# Outputs: {'report': PosixPath('/scratch/home/sr3622/Firm-DTI/Firm-DTI2/outputs2/report/9ddc5298a8e2/nevermore_report.csv')}
# Details: {
#   "rows": 32
# }

In [None]:
# [ingest] starting...
# [ingest] cached (sig=dd9a0696f2c3)
# [features] starting...
# [features] cached (sig=5d19f5fd62f6)
# [optimization] starting...
# /home/sr3622/miniconda3/envs/digress10/lib/python3.10/site-packages/hyperopt/atpe.py:19: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
#   import pkg_resources
# /home/sr3622/miniconda3/envs/digress10/lib/python3.10/site-packages/nevergrad/parametrization/_datalayers.py:107: NevergradRuntimeWarning: Bounds are 1.503239631652832 sigma away from each other at the closest, you should aim for at least 3 for better quality.
#   warnings.warn(
# [optimization] done (sig=dfb538ae0ddf)
# [retrieval] starting...
# [retrieval] done (sig=91f93cc996c9)
# [visualization] starting...
# [visualization] done (sig=c094577dc934)
# [docking] starting...
# [docking] done (sig=0b1c19935fce)
# [report] starting...
# [report] done (sig=14763b88dcf3)
# Optimization signature: 14763b88dcf3
# Outputs: {'report': PosixPath('/scratch/home/sr3622/Firm-DTI/Firm-DTI2/outputs2/report/14763b88dcf3/nevermore_report.csv')}
# Details: {
#   "rows": 32
# }

In [None]:
# ingest] starting...
# [ingest] cached (sig=dd9a0696f2c3)
# [features] starting...
# [features] cached (sig=5d19f5fd62f6)
# [optimization] starting...
# /home/sr3622/miniconda3/envs/digress10/lib/python3.10/site-packages/nevergrad/parametrization/_datalayers.py:107: NevergradRuntimeWarning: Bounds are 1.503239631652832 sigma away from each other at the closest, you should aim for at least 3 for better quality.
#   warnings.warn(
# [optimization] done (sig=7f2a12bf0f42)
# [retrieval] starting...
# [retrieval] done (sig=2961bd835538)
# [visualization] starting...
# [visualization] done (sig=42fa826acfb1)
# [docking] starting...
# [docking] done (sig=0b77bfb49954)
# [report] starting...
# [report] done (sig=87ac4205a277)
# Optimization signature: 87ac4205a277
# Outputs: {'report': PosixPath('/scratch/home/sr3622/Firm-DTI/Firm-DTI2/outputs2/report/87ac4205a277/nevermore_report.csv')}
# Details: {
#   "rows": 32
# }

In [None]:
# ingest] starting...
# [ingest] cached (sig=dd9a0696f2c3)
# [features] starting...
# [features] cached (sig=5d19f5fd62f6)
# [optimization] starting...
# [optimization] cached (sig=a491887c982e)
# [retrieval] starting...
# [retrieval] cached (sig=30741816701a)
# [visualization] starting...
# [visualization] cached (sig=21bdaf11ffcd)
# [docking] starting...
# [docking] vina:  97%|█████████▋| 30/31 [16:48<00:33, 33.60s/it]
# [docking] done (sig=916e902a2af8)
# [report] starting...
# [report] done (sig=fca49a33e605)
# Optimization signature: fca49a33e605
# Outputs: {'report': PosixPath('/scratch/home/sr3622/Firm-DTI/Firm-DTI2/outputs2/report/fca49a33e605/nevermore_report.csv')}
# Details: {
#   "rows": 46
# }

In [None]:
# [ingest] starting...
# [ingest] cached (sig=dd9a0696f2c3)
# [features] starting...
# [features] cached (sig=5d19f5fd62f6)
# [optimization] starting...
# [optimization] cached (sig=e8651d4ea0c6)
# [retrieval] starting...
# [retrieval] cached (sig=4e2c791656aa)
# [visualization] starting...
# [visualization] cached (sig=479e90499275)
# [docking] starting...
# [docking] vina:  97%|█████████▋| 30/31 [16:48<00:33, 33.62s/it]
# [docking] done (sig=f0f0532e0796)
# [report] starting...
# [report] done (sig=0d9b6c1d7038)
# Optimization signature: 0d9b6c1d7038
# Outputs: {'report': PosixPath('/scratch/home/sr3622/Firm-DTI/Firm-DTI2/outputs2/report/0d9b6c1d7038/nevermore_report.csv')}
# Details: {
#   "rows": 46
# }

In [None]:

# Inspect optimizsation summary CSV and target manifest
from pathlib import Path
summary_path = Path(opt_res.outputs.get('report', ''))
if summary_path.exists():
    display(pd.read_csv(summary_path).head(20))


In [None]:

df = pd.read_csv(summary_path)

# Sort by distance_L1 (ascending) to inspect best matches first
df_sorted = df.sort_values("distance_L1", ascending=True)
df_sorted

In [None]:
opt_res.outputs


In [None]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

ax = df_sorted.plot.scatter(x="predicted_affinity", y="distance_L1", title="Distance_L1 vs Predicted Affinity")
plt.show()





In [None]:
import pandas as pd
from pathlib import Path

# Point to your features run (replace with your signature if you know it)
feat_root = Path("../../outputs/features")
sig = "97df27707ad5"  # set to your features signature
admet_path = feat_root / sig / "admet_features.csv"

df = pd.read_csv(admet_path)

# Keep only numeric ADMET columns (drop metadata)
numeric_cols = [c for c in df.columns if c not in {"dataset_index", "smiles"}]
stats = df[numeric_cols].agg(["mean", "min", "max"]).T
stats.columns = ["mean", "min", "max"]

display(stats)


In [None]:
#This is Sars-cov2- Mpro info - just keep it here to know which model I used 

# [ingest] starting...
# [ingest] cached (sig=d2f8d2740f3a)
# [features] starting...
# [features] cached (sig=329d35f96e63)
# [optimization] starting...
# [optimization] cached (sig=ab8322096ede)
# [retrieval] starting...
# [retrieval] cached (sig=205d2ba7a6d6)
# [visualization] starting...
# [visualization] cached (sig=060992496574)
# [docking] starting...
# [docking] cached (sig=3a7ed5596107)
# [report] starting...
# [report] cached (sig=b7ad180a2493)
# Optimization signature: b7ad180a2493
# Outputs: {'report': PosixPath('/scratch/home/sr3622/Firm-DTI/Firm-DTI2/pipeline_output/report/b7ad180a2493/nevermore_report.csv')}
# Details: {
#   "rows": 101
# }


# 1) Structure
curl -L -o 7SI9.cif https://files.rcsb.org/download/7SI9.cif
curl -L -o 7SI9.pdb https://files.rcsb.org/download/7SI9.pdb

# 2) FASTA sequence from the structure entry
curl -L -o 7SI9.fasta https://www.rcsb.org/fasta/entry/7SI9

# 3) Baseline ligand page (for SMILES/InChIKey via PubChem CID 155903259)
# (open in browser) https://pubchem.ncbi.nlm.nih.gov/compound/155903259
